<?xml version="1.0" encoding="UTF-8"?>
<!-- $Id: web-fall09.xml 1182 2009-12-03 22:00:33Z dret $ -->
<?hotspot layout-path="hotspot/hotspot/layout" ?>
<?hotspot kilauea-path="hotspot/kilauea" ?>
<?hotspot layout="ischool" ?>
<hotspot xmlns="http://dret.net/xmlns/hotspot/1" xmlns:hotspot="http://dret.net/xmlns/hotspot/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://dret.net/xmlns/hotspot/1 hotspot/hotspot/schemas/hotspot.xsd">
	<configuration>
		<link subsections="yes" bookmarks="yes" versions="web-fall09.xml" home="./" help="quick" contents="./" glossary="http://dret.net/glossary/" author="http://dret.net/netdret/"/>
		<paths img="img" listing="src"/>
		<outline count-text=" [*]" count-depth="all"/>
		<hyperlink extra=""/>
		<extension file="html" link=""/>
		<counter separator=":&#160;"/>
	</configuration>
	<license uri="http://creativecommons.org/licenses/by/3.0/" short="CC 3.0">
		<div class="license">
			<p><a rel="license" title="view full text of license" href="http://creativecommons.org/licenses/by/3.0/"><img alt="Creative Commons License" src="hotspot/hotspot/layout/ischool/ischool/somerights20.png" border="0" height="31" width="88"/></a></p>
			<p><a class="outlink" rel="license" title="view full text of license" href="http://creativecommons.org/licenses/by/3.0/">This work is licensed under a CC<br/>Attribution 3.0 Unported License</a></p>
		</div>
	</license>
    <title short="Web Architecture"><a href="./" title="Course Homepage">Web Architecture</a><br/>Fall 2009 &#x2014; INFO 290 (CCN 42593)</title>
    <author short="E. Wilde" affiliation="UC Berkeley ISchool"><a href="http://dret.net/netdret/" title="dret.net">Erik Wilde</a></author>
    <affiliation short="UC Berkeley ISchool"><a href="http://www.berkeley.edu/" title="University of California, Berkeley">UC Berkeley</a> <a href="http://ischool.berkeley.edu/" title="ISchool">School of Information</a></affiliation>
    <date short="Fall 2009">Fall Semester 2009</date>
    <copyright>2009 Erik Wilde</copyright>
    <categories>
        <category element="xml" class="xml" name="XML"/>
        <category element="elem" class="xml elem" name="XML Element"/>
        <category element="html" class="html" name="HTML"/>
        <category element="htmla" class="html" name="HTML Attribute"/>
        <category element="htmel" class="html elem" name="HTML Element"/>
        <category element="cssp" class="css" name="CSS Property"/>
        <category element="csss" class="css" name="CSS Selector"/>
        <category element="css" class="css" name="CSS"/>
        <category element="xpathf" class="xpath" name="XPath Function"/>
        <category element="xpath" class="xpath" name="XPath"/>
        <category element="xslte" class="xslt elem" name="XSLT Element"/>
        <category element="xslta" class="xslt" name="XSLT Attribute"/>
        <category element="xslt" class="xslt" name="XSLT"/>
        <category element="xsde" class="xsd elem" name="XSD Element"/>
        <category element="xsda" class="xsd" name="XSD Attribute"/>
        <category element="xsd" class="xsd" name="XSD"/>
        <category element="uri" class="uri" name="URI"/>
        <category element="http" class="http" name="HTTP"/>
        <category element="mime" class="mime" name="MIME"/>
        <category element="atom" class="atom" name="Atom"/>
    </categories>
	<toc name="toc.html">
		<table rules="all" cellspacing="0" cellpadding="5" width="100%">
			<thead>
				<tr>
					<th valign="bottom">Date</th>
					<th valign="bottom">Subject</th>
					<th valign="bottom">Slides</th>
					<th valign="bottom">Required Reading</th>
					<th valign="bottom">Additional Resources</th>
					<th valign="bottom"><a href="a/">Assignments</a></th>
				</tr>
			</thead>
			<tbody>
				<hotspot:for-each-presentation>
					<tr>
						<td align="right" valign="top"><hotspot:date/></td>
						<td valign="top"><b><hotspot:title/><span class="toggle">:</span></b> <span class="toggle"><span class="abstract"><hotspot:toc class="abstract"/></span></span></td>
						<td align="center"><hotspot:presentation-link title="Lecture Slides"><hotspot:title form="short"/></hotspot:presentation-link> <span class="toggle"><hotspot:slides>(*&#160;Slides)</hotspot:slides></span></td>
						<td align="center"><hotspot:toc class="reading"/></td>
						<td align="center"><hotspot:toc class="resources"/></td>
						<td align="center"><hotspot:toc class="assignment"/></td>
					</tr>
				</hotspot:for-each-presentation>
			</tbody>
		</table>
	</toc>
    <presentation id="intro">
        <title short="Introduction">Overview and Introduction</title>
        <date>2009-08-27</date>
        <toc class="abstract">This introductory lecture gives the motivation for the course, some information about the people involved and the organization of the course, a high-level overview of the course's topics, and an overview of the assignments which are an important part of the course program.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Building Things That Actually Work</title>
			<ul>
				<li>Building things …</li>
				<ul>
					<li>building is designing (with a lot of implicit design decisions)</li>
					<li>building things uncovers an essential set of constraints</li>
					<li>changing things should not be not too hard (if they are well-designed)</li>
				</ul>
				<li>… that actually work</li>
				<ul>
					<li><q>work</q> means more than <q>look good for the final demo</q></li>
					<li>how adaptable is an application to a changing environment?</li>
					<li>how easy is it to integrate new input and output channels?</li>
					<li>how easy can it be extended to meet new requirements?</li>
				</ul>
				<li>System design is (an important) part of design</li>
				<ul>
					<li>isolated design approaches (for example, UI only) will not deliver the best solutions</li>
					<li><a href="http://www.ischool.berkeley.edu/programs/courses/290-ISaSDSMaM">an <q>end-to-end view</q> involves the complete view of a system</a></li>
				</ul>
				<li>Start building things as early as possible</li>
			</ul>
		</slide>
		<slide>
			<title>What is Architecture?</title>
			<table width="95%">
				<tr>
					<td>
						<img style="width : 90% ; margin : 2% ; " src="map-newyork.png" title="New York City" href="http://maps.google.com/maps?ie=UTF8&amp;hl=en&amp;ll=40.75792,-73.982191&amp;spn=0.049281,0.093641&amp;z=14"/>
					</td>
					<td>
						<img style="width : 90% ; margin : 2% ; " src="map-luebeck.png" title="Lübeck" href="http://maps.google.com/maps?ie=UTF8&amp;hl=en&amp;ll=53.866447,10.68974&amp;spn=0.019182,0.046821&amp;z=15"/>
					</td>
				</tr>
			</table>
		</slide>
		<slide>
			<title>Architecture vs. Design</title>
			<img style="width : 90% ; margin : 2% ; " src="rooftop-pool.jpg" title="Nice Design, Expensive Architecture" href="http://www.starwoodhotels.com/luxury/property/overview/index.html?propertyID=3321"/>
		</slide>
		<slide>
			<title>What is an Architect?</title>
			<img style="float : right ; margin-right : 2em ; " src="gherkin.jpg" title="London Gherkin" href="http://www.fosterandpartners.com/Projects/1004/Default.aspx"/>
			<ul>
				<li><q>Star Architects</q> are not typical</li>
				<ul>
					<li>they sell brand names and deliver high profile results</li>
					<li>most architects are more modest and less visible</li>
				</ul>
				<li>Architects must understand how things work</li>
				<ul>
					<li>a reasonable understanding of the disciplines involved</li>
					<li>an excellent understanding of how disciplines interact</li>
					<li>negotiating between specialists for a good overall design</li>
				</ul>
				<li>Architects are guides</li>
				<ul>
					<li>they provide guidance for going in the right direction</li>
					<li>they can tell why this direction is the right direction</li>
					<li>they can explain why a wrong direction is wrong</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>How to become a Web Architect?</title>
			<ul>
				<li>Understand Web technologies and their dependencies</li>
				<ul>
					<li>no need to become an expert in all of the areas</li>
					<li>the important part is understanding the dependencies</li>
				</ul>
				<li>Understand how to compare application architectures</li>
				<ul>
					<li>there is no <q>best solution</q> for any given problem</li>
					<li>every solution must be evaluated in terms of <em>various constraints</em></li>
				</ul>
				<li>Next steps for your career in Web architecture</li>
				<ul>
					<li>understand how the <a href="../xml-fall09/" title='"XML Foundations" course fall 2009'>back-end plumbing (a.k.a. XML)</a> works in detail</li>
					<li>get involved in <a href="http://isd.ischool.berkeley.edu/project/" title="ISD Clinic project overview">real-world projects</a> in the <a href="http://isd.ischool.berkeley.edu/about/clinic" title="ISD Clinic project overview">ISD Clinic</a></li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Course Setup</title>
			<ul>
				<li>Broad overview of core Web technologies</li>
				<ul>
					<li>this is <em>not</em> a Web design or Web programming course</li>
				</ul>
				<li><a href="a/">Assignments</a> working with various Web technologies</li>
				<ul>
					<li>how to deliver client-specific content</li>
					<li>how to design client-specific styles</li>
					<li>using Ajax for creating more dynamic Web pages</li>
					<li>repurposing existing content for syndication</li>
				</ul>
				<li>Grading is based on assignments and final exam (30min oral)</li>
				<ul>
					<li><a href="a/">the assignments</a> cover most of the topics in the course</li>
					<li>all assignments have an optional part for extra credit</li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Motivation</title>
			<slide>
				<title>Closed World Assumption</title>
				<blockquote>If the only tool you have is a hammer, you tend to see every problem as a nail.</blockquote>
				<p class="quotenote"><a href="http://en.wikipedia.org/wiki/Abraham_Maslow">Abraham Maslow</a></p>
				<ul>
					<li>People, such as content creators, typically are lazy</li>
					<ul>
						<li>developing content and code for diverse users and clients is hard</li>
						<li>by making assumptions, this job can become considerably easier</li>
					</ul>
					<li>Tools often hide complexity and/or take away freedom</li>
					<ul>
						<li>they are good if tool users <em>know what they are doing</em></li>
						<li>tool users should <em>know alternatives and when to switch tools</em></li>
					</ul>
					<li>Tool makers provide support for lazy people</li>
					<ul>
						<li>built-in simplifications of the tool's target technology</li>
						<li>pre-packaged excuses why it is appropriate to use the tool</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Your Tax $ @ Work</title>
				<ul>
					<li><a href="http://www.grants.gov/applicants/apply_for_grants.jsp">How to apply for NSF grants</a></li>
					<li>Over $400 billion in grants each year</li>
					<li><em>PureEdge</em> is <em>required</em> as the technology to fill out grant forms</li>
						<ul>
							<li><a href="http://www-306.ibm.com/software/swnews/swnews.nsf/n/nhan6eerne">acquired by IBM</a> and now called <em href="http://www-142.ibm.com/software/workplace/products/product5.nsf/wdocs/formshome">IBM Workplace Forms</em></li>
						</ul>
					<li>All of this probably looked nice for the final demo …</li>
					<li>Web forms do not provide all the features required by the specification</li>
					<ul>
						<li>offline editing of applications is not possible, Web forms work online only</li>
						<li>there is no built-in signing of form contents, but there are technologies for it</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Version 1: Go IE or Go Home</title>
				<ul>
					<li>Well, but not if you are using Vista …</li>
					<li>PureEdge is an IE plug-in for filling out forms online and offline</li>
					<ul>
						<li>plug-ins are specific for the browser for which they are developed</li>
						<li>plug-ins are specific for the OS on which they run</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Version 2: Buy a Virtual Computer</title>
				<ul>
					<li>Government authorities are (usually) concerned about accessibility</li>
					<ul>
						<li>restricting $400 billion of grant money to IE users only seems a bit restrictive</li>
						<li>is there a <em>reasonable argument</em> to be made for this restriction</li>
					</ul>
					<li>Grants.gov recommended to get a virtual PC to access the portal</li>
					<ul>
						<li>users have to buy virtual PC software</li>
						<li>users have to buy Windows to run on the virtual PC</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Version 3: Use a Virtual Computer</title>
				<ul>
					<li>Grants.gov set up a <em>Citrix server</em> for grant applicants</li>
					<ul>
						<li>Citrix server licenses are not cheap to buy</li>
						<li>applicants still have to install the Citrix client (which is free)</li>
						<li>running a Citrix server farm is pretty expensive</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Version 4: Crash your Computer</title>
				<ul>
					<li>After some time, <a href="http://www.grants.gov/resources/download_software.jsp#pureedgeviewer">PureEdge for Mac was released</a>, features include:</li>						
					<ul>
						<li><q cite="http://www.grants.gov/resources/download_software.jsp#pureedgeviewer">occasional crashes and subsequent loss of any unsaved data</q></li>
						<li><q cite="http://www.grants.gov/resources/download_software.jsp#pureedgeviewer">inability to run on Mac OS version prior to 10.4.6</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Classical Lock-In</title>
				<ul>
					<li>Companies usually sell <em>products</em>, not just <em>solutions</em></li>
					<li>Lock-in happens quickly and is hard to escape from later</li>
					<li>Lock-in usually carries a pretty high price tag</li>
					<li>Lock-in solutions can be good, but it is an important decision</li>
					<li>Standards-based solutions may lack some sophistication</li>
					<ul>
						<li>but often they may still be good enough to solve a problem</li>
						<li>being able to change the platform easily is a valuable asset</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Varia</title>
			<slide>
				<title>About Me</title>
				<ul>
					<li>Computer Science at <a href="http://www.tu-berlin.de/eng/">Technical University of Berlin (TUB)</a> (88-91)</li>
					<li>Ph.D. at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> (92-97)</li>
					<li>Post-Doc at <a href="http://www.icsi.berkeley.edu/" title="International Computer Science Institute">ICSI, Berkeley</a> (97/98)</li>
					<ul>
						<li>book on <q><a href="http://dret.net/netdret/publications#wil98">Technical Foundations of the World Wide Web</a></q></li>
					</ul>
					<li>Various activities in Switzerland (98-06)</li>
					<ul>
						<li>teaching at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> and <a href="http://www.fhnw.ch/">FHNW</a></li>
						<li>working as independent consultant (training, courses, consulting)</li>
						<li>research in <a href="http://dret.net/projects/">various XML-related areas</a></li>
					</ul>
					<li>Professor at the <a href="http://ischool.berkeley.edu/">School of Information</a> (since Fall 2006)</li>
					<ul>
						<li>Technical Director of the <a href="http://isd.ischool.berkeley.edu/">Information and Service Design (ISD) program</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>About this Course</title>
				<ul>
					<li>Course Web page: <code><a href="./">http://dret.net/lectures/web-fall09/</a></code></li>
					<li>Course mailing list: <code><a href="mailto:web-fall09@bspace.berkeley.edu">web-fall09@bspace.berkeley.edu</a></code></li>
					<ul>
						<li>archived in the <a href="https://bspace.berkeley.edu/portal/site/8a9055d1-74ba-4cc9-a4a1-19d7f6d7b16f/page/acb4a4ee-7471-48bd-b308-19c4e2e7233c">bspace email archive</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>About these Slides</title>
				<ul>
					<li>Generated from <a href="http://dret.net/projects/xslidy/">Hotspot</a> <a href="web-fall09.xml">XML</a></li>
					<li>Designed for online presentation and use (lots of links!)</li>
					<ul>
						<li>Firefox <a href="http://dret.typepad.com/dretblog/2008/07/go-up.html">Go Up</a> allows easy navigation up one level</li>
						<li>Firefox <a href="https://addons.mozilla.org/en-US/firefox/addon/1949">Site Navigation Bar</a> supports navigation of links</li>
						<li>Firefox <a href="https://addons.mozilla.org/en-US/firefox/addon/2933">Link Widgets</a> requires a bit more configuration (more flexibility)</li>
						<li>for printing, use <q>a</q> (all slides), and then <q>s</q> (smaller font) a couple of times</li>
					</ul>
					<li>A good real-world example for Web-based publishing</li>
					<ul>
						<li>Slidy/Kilauea is useful, but there is no support for structures and hyperlinking</li>
						<li>Hotspot adds these features by adding an XSLT transformation</li>
						<li>Hotspot is useful, but there is no interface (XML editing only)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Additional Resources</title>
				<ul>
					<li><a href="http://dret.net/glossary/">Online Glossary at <code>http://dret.net/glossary/</code></a></li>
					<ul>
						<li>suggestions, updates, corrections are very welcome (really!)</li>
						<li>another exercise in how to use XML and XSLT for information management</li>
					</ul>
					<li><a href="http://dret.net/biblio/">Bibliography at <code>http://dret.net/biblio/</code></a></li>
					<ul>
						<li>suggestions, updates, corrections are very welcome (really!)</li>
						<li>produced by an XML-centric system for managing bibliography data</li>
					</ul>
					<li>The <a href="http://www.w3.org/"><em>World Wide Web Consortium (W3C)</em></a></li>
					<ul>
						<li>headed by <em href="http://www.w3.org/People/Berners-Lee/">Tim-Berners Lee</em>, inventor of the Web (with <a href="http://en.wikipedia.org/wiki/Robert_Cailliau">Robert Cailliau</a>)</li>
					</ul>
					<li>The <a href="http://www.ietf.org/"><em>Internet Engineering Task Force (IETF)</em></a></li>
					<ul>
						<li>mainly Internet standards, but also responsible for URIs and HTTP (and Atom)</li>
					</ul>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="browsers">
        <title short="Browsers">Web Browsers</title>
        <date>2009-09-01</date>
        <toc class="reading"><a href="http://en.wikipedia.org/wiki/Web_Browser" title="Wikipedia: Web Browser">Wikipedia</a>&#160;· <a href="http://en.wikipedia.org/wiki/History_of_the_web_browser" title="Wikipedia: History of the Web Browser">History</a>&#160;· <a href="http://www.youtube.com/watch?v=o4MwTvtyrUQ" title="YouTube Video: What is a Browser?">YouTube</a></toc>
        <toc class="resources"><a href="http://www.mozilla.com/firefox/">Firefox</a>&#160;· <a href="http://www.apple.com/safari/">Safari</a>&#160;· <a href="http://www.microsoft.com/windows/products/winfamily/ie/default.mspx">IE</a>&#160;· <a href="http://www.google.com/chrome">Chrome</a>&#160;· <a href="http://www.opera.com/">Opera</a></toc>
        <toc class="abstract">This lecture looks at <em>Web browsers</em> and how they work. It introduces the basic functionalities of a browser; retrieval and rendering of Web pages. Any modern browser needs to support more than just HTTP and HTML; it must support CSS for stylesheets, JavaScript for scripted Web pages, various image formats, and popular applications such as Flash. In addition, browsers can support additional functionality such as off-line operation, or in general more application-oriented features such as <em>AIR</em> or <em>Silverlight</em>.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
        <part>
			<title>Browser Basics</title>
			<slide>
				<title>What is a Web Browser?</title>
				<ul>
					<li>Network access (HTTP, HTTPS, FTP, file system, …)</li>
					<li>Rendering HTML layout (a subset of CSS layout)</li>
					<ul>
						<li>CSS specifies many more features</li>
					</ul>
					<li>Handling special HTML in the required way</li>
					<ul>
						<li>images (in various formats) must be downloaded and embedded</li>
						<li>forms must be rendered and form data must be submitted</li>
					</ul>
					<li>Running scripts and providing them access to the page</li>
					<ul>
						<li>re-rendering when scripts change the page (DHTML)</li>
						<li>providing scripts with network access (Ajax)</li>
					</ul>
					<li>Utility functions to make the browser more usable</li>
					<ul>
						<li>tabs and bookmarks for more organized browsing</li>
						<li>security policies for safer browsing</li>
						<li>additional content types may be supported (by external software)</li>
						<li>the browser may be extended (add-ons)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>One Minute in the Life of a Browser</title>
				<ol>
					<li>Analyze URI and connect to server to retrieve resource</li>
					<ul>
						<li>recursively repeat until all required resources are retrieved</li>
					</ul>
					<li>Analyze HTML, correct errors, and compute a <em>DOM tree</em></li>
					<ul>
						<li>DOM is a memory representation of the HTML markup</li>
					</ul>
					<li>Apply CSS and compute the layout of the styled DOM tree</li>
					<ul>
						<li>compute CSS decorated DOM and apply formatting algorithm to it</li>
					</ul>
					<li>Start executing <link href="scripting"/> code and change the DOM as required</li>
					<ul>
						<li>scripting may have initial phase and user interaction phase</li>
					</ul>
					<li>Continue executing scripting code in response to user interactions</li>
					<ul>
						<li>for many dynamic Web pages, this is a continuous activity</li>
					</ul>
					<li>If the user clicks on a link, start all over again</li>
				</ol>
			</slide>
			<slide>
				<title>Browsers, Apps, Operating Systems</title>
				<ul>
					<li>Traditionally, a browser is an application for an OS</li>
					<ul>
						<li>loading page descriptions and rendering the pages</li>
						<li>the first browsers did not execute any code</li>
					</ul>
					<li><link href="scripting"/> and Plug-ins changed the browser into a platform</li>
					<ul>
						<li>browsers are code (the browser code) executing downloaded code</li>
					</ul>
					<li>Browsers are becoming increasingly feature-rich</li>
					<ul>
						<li><code>XMLHttpRequest</code> allows script/server communications</li>
						<li><a href="http://www.w3.org/html/wg/">HTML5</a> adds WebStorage, WebSockets, and more OS-like features</li>
					</ul>
					<li>Browsers could become the only app to run on hardware</li>
					<ul>
						<li>Google's <a href="http://googleblog.blogspot.com/2009/07/introducing-google-chrome-os.html">ChromeOS</a> may become the first <q>browser OS</q></li>
						<li>Palm is going a different route with <a href="http://en.wikipedia.org/wiki/WebOS">webOS</a> (JavaScript apps)</li>
						<li>WebOS and ChromeOS are essentially the same (rich JavaScript runtimes)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Browser Usage</title>
				<img src="web-browser-usage.png" style="float : right ; width : 40% ; margin : 0 1em 2em 2em ; " href="http://en.wikipedia.org/wiki/Usage_share_of_web_browsers" title="Browser Usage"/>
				<p><span style="border:none;background-color:#0000ff;color:#0000ff;" class="aide" title="#0000ff">██</span>&#160;Internet Explorer (69.80%)</p>
				<p><span style="border:none;background-color:#ff9900;color:#ff9900;" class="aide" title="#ff9900">██</span>&#160;Mozilla Firefox (20.66%)</p>
				<p><span style="border:none;background-color:#cccccc;color:#cccccc;" class="aide" title="#cccccc">██</span>&#160;Safari (7.18%)</p>
				<p><span style="border:none;background-color:#ffff00;color:#ffff00;" class="aide" title="#ffff00">██</span>&#160;Chrome (0.87%)</p>
				<p><span style="border:none;background-color:#ff0000;color:#ff0000;" class="aide" title="#ff0000">██</span>&#160;Opera (0.72%)</p>
				<p><span style="border:none;background-color:#66cc33;color:#66cc33;" class="aide" title="#66cc33">██</span>&#160;Netscape (0.52%)</p>
				<p><span style="border:none;background-color:#00ffff;color:#00ffff;" class="aide" title="#00ffff">██</span>&#160;Other (0.25%)</p>
			</slide>
			<slide>
				<title>Browsers and CSS</title>
				<ul>
					<li>Browsers have their own <em>built-in CSS code</em></li>
					<ul>
						<li>HTML pages with no CSS are still formatted in some way</li>
						<li>HTML pages can provide their own CSS to change defaults</li>
						<li>users can change the browser's default to their own preferences</li>
					</ul>
					<li>CSS has a <a href="http://www.w3.org/TR/CSS21/cascade.html#cascading-order">well-defined way of how stylesheets are combined</a></li>
					<ol>
						<li>browser defaults</li>
						<li>user declarations</li>
						<li>page declarations</li>
						<li>page <css href="http://www.w3.org/TR/CSS21/cascade.html#important-rules">important</css> declarations</li>
						<li>user <css href="http://www.w3.org/TR/CSS21/cascade.html#important-rules">important</css> declarations</li>
					</ol>
					<li>Rendering of HTML/CSS depends on a variety of factors</li>
					<ul>
						<li>default settings of the browser</li>
						<li>preferences set by the user</li>
						<li>CSS code provided by the page author</li>
						<li>HTML/CSS capabilities of the browser</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Browsers and the Internet</title>
				<p>Before retrieving the Web page <uri href="http://www.berkeley.edu/">http://www.berkeley.edu/</uri>, the browser first has to find out the <link href="ip">IP </link>address of the <code>www.berkeley.edu</code> server. Using this address, it can then open an <link href="http">HTTP</link> connection. The lookup service used by the browser is the <link href="dns"/>.</p>
				<img style="width : 90% ; margin : 2% ; " src="browser-dns-http.png"/>
			</slide>
			<slide>
				<title>Supported URI Schemes</title>
				<ul>
					<li>Most Web pages are available over <link href="http">HTTP</link></li>
					<ul>
						<li>one popular exception are pages available over <link href="https">HTTPS</link></li>
					</ul>
					<li>Most browsers support more than just the HTTP and HTTP <link href="uri-schemes"/></li>
					<ul>
						<li><uri>http:</uri> and <uri>https:</uri> are necessary (these are the Web protocols)</li>
						<li><uri href="http://en.wikipedia.org/wiki/File_URI_scheme">file:</uri> allows the browser to load local files</li>
						<li><uri>ftp:</uri> is useful because many documents are available on FTP servers</li>
						<li><uri>mailto:</uri> usually is not built into the browser (the mail tool is started)</li>
						<li><uri>tel:</uri> is a useful scheme for devices with telephone functionality</li>
					</ul>
					<li>Firefox 3 allows the <a href="https://developer.mozilla.org/en/Web-based_protocol_handlers">registration of protocol handlers</a></li>
				</ul>
			</slide>
			<slide>
				<title>Caching</title>
				<ul>
					<li>Browsers retrieve resources for rendering Web pages</li>
					<li>In a typical user session, many resources are used repeatedly</li>
					<ul>
						<li>using the browser's <q>back</q> button</li>
						<li>accessing pages reusing the same CSS or images</li>
					</ul>
					<li><em>Caching</em> is a frequently used optimization in computer systems</li>
					<ol>
						<li>store retrieved data locally</li>
						<li>reuse that data when it is used again instead of fetching it again</li>
						<li>the hard (and important) part is <em>cache invalidation</em></li>
					</ol>
					<li><em href="http://en.wikipedia.org/wiki/Link_prefetching">Prefetching</em> allows browsers to load pages in advance</li>
					<ul>
						<li>predicting user behavior usually is hard or impossible</li>
						<li>unnecessary prefetching generates unnecessary load on servers and the network</li>
						<li>faster networks make load delays less painful</li>
						<pre>&lt;link rel="prefetch" href="http://www.example.com/"></pre>
					</ul>
					<li><a href="http://en.wikipedia.org/wiki/Google_Web_Accelerator">Google Web Accelerator</a> was a Google-specific approach</li>
					<ul>
						<li>using prefetching and compression (connecting to a Google cache)</li>
						<li>serious privacy implications (<em>all</em> traffic is routed through Google)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Security and Privacy</title>
				<ul>
					<li>Browsers store a lot of security-sensitive data</li>
					<ul>
						<li>data entered in forms is stored for future visits</li>
						<li>authentication credentials (<link href="cookies"/>) are stored on behalf of servers</li>
						<li>the browsing history of visited pages is stored</li>
                        <li>passwords are stored in password managers</li>
					</ul>
					<li>Connecting to HTTPS Web sites requires a certificate validity check</li>
					<ul>
						<li>browsers come with a large set of pre-installed <em>certification authorities</em></li>
						<li>users implicitly trust this list of pre-installed authorities</li>
					</ul>
					<li>Browsers provide control over these features in complicated settings</li>
					<li>Browsers start providing more user-friendly <q>private modes</q></li>
					<ul>
						<li>Safari calls the feature <em>private browsing</em></li>
						<li>IE8 has an <a href="http://www.microsoft.com/windows/internet-explorer/beta/features/browse-privately.aspx">InPrivate</a> mode</li>
						<li>Firefox 3.1 includes <em>Private Browsing</em></li>
					</ul>
					<li>Security/Privacy (as always) is a trade-off with convenience</li>
				</ul>
			</slide>
			<slide>
				<title>Browsers and Scripting</title>
				<ul>
					<li><link href="scripting"/> is essential for most modern Web pages</li>
					<ul>
						<li>well-designed Web pages also work when scripting is turned off</li>
						<li>many Web pages are not designed all that well</li>
						<li>when scripting is turned on, behavior should be predictable and consistent</li>
					</ul>
					<li>Scripting problems plagued Web developers for a long time</li>
					<ul>
						<li>major parts of Web development go into ensuring compatibility</li>
						<li>ill-behaving browsers (such as IE) make it impossible to develop simple code</li>
						<li><link href="js-frameworks"/> provide <q>compatibility layers</q> on top of browsers</li>
					</ul>
					<li>Browsers can morph into <q>runtime environments</q></li>
					<ul>
						<li>using <a href="http://docs.google.com/">Google Docs</a> has little to do with Web browsing</li>
						<li>some essential features are missing (offline capabilities, local storage)</li>
						<li><link href="chrome"/> is Google's attempt to morph the Web into an application platform</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part>
			<title>Content Type Handling</title>
			<slide>
				<title>Content Types</title>
				<ul>
					<li>Browsers retrieve resources and render them</li>
					<ol>
						<li>URI identifies a resource to be retrieved</li>
						<li>HTTP request is sent to the server requesting the resource</li>
						<li>HTTP response is received containing and describing the resource</li>
						<li>the <link href="mime">media type</link> determines how the browser handles the resource</li>
					</ol>
					<li>Browsers can handle resource in four different ways</li>
					<ol>
						<li><link href="built-in"/> allows the browser to handle the resource by itself</li>
						<li><link href="add-on"/> extend the browser with additional capabilities</li>
						<li><link href="plug-in"/> are platform-specific extensions</li>
						<li><link href="external-viewer"/> are programs to which the browser passes the resource</li>
						<li>for unknown media types most browsers prompt users to save them locally</li>
					</ol>
				</ul>
			</slide>
			<part id="built-in">
				<title>Built-In Support</title>
				<slide>
					<title>Built into the Browser</title>
					<ul>
						<li>The Web is built on few universal media types</li>
						<li>Variety on the Web is achieved through two major factors:</li>
						<ol>
							<li>the established media types are not application-specific</li>
							<li>advanced content can be based on <em>browser-based runtime environments</em></li>
						</ol>
						<li>Examples of built-in media types:</li>
						<ul>
							<li>HTML+CSS for rendering formatted Web pages</li>
							<li>popular <link href="image-formats"/> (GIF, JPEG, PNG, ICO)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Advantages/Disadvantages</title>
					<ul>
						<li>Advantages of built-in media types:</li>
						<ul>
							<li>fast and seamless rendering</li>
							<li>should be supported by any browser (safe choice for developers)</li>
						</ul>
						<li>Disadvantages of built-in media types:</li>
						<ul>
							<li>cannot be added by users (not extensible)</li>
							<li>browser must be upgraded to add new types</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="add-on">
				<title>Add-Ons</title>
				<slide>
					<title>Browser-Specific Additions</title>
					<ul>
						<li>Supported by the browser itself (built into the browser framework)</li>
						<li>Usually support <em>additional functionality</em> and not new media types</li>
						<li>Add-ons are productivity enhancements for users</li>
						<li>Examples of add-ons:</li>
						<ul>
							<li><a href="https://addons.mozilla.org/en-US/firefox/addon/5203">Minimap Sidebar</a> for better support for geolocation data</li>
							<li><a href="https://addons.mozilla.org/en-US/firefox/addon/4106">Operator</a> for visualizing a Web page's metadata</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Advantages/Disadvantages</title>
					<ul>
						<li>Advantages of add-ons:</li>
						<ul>
							<li>only browser-dependent (i.e., not OS dependent)</li>
							<li>can be installed individually and specific for users</li>
							<li>allow browser specific behavior (e.g., <link href="firebug"/> for Firefox debugging)</li>
						</ul>
						<li>Disadvantages of add-ons:</li>
						<ul>
							<li>cannot be used across browsers</li>
							<li>conflicts between add-ons can lead to instabilities</li>
							<li>Web applications cannot count on them being available (e.g., <a href="http://gears.google.com/">Google Gears</a> and <link href="chrome"/>)</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="plug-in">
				<title>Plug-Ins</title>
				<slide>
					<title>Platform Code in the Browser</title>
					<ul>
						<li>Platform-specific code running inside the browser</li>
						<ul>
							<li>window created by an <htmel>object</htmel> with given window dimensions</li>
							<li>the window displays whatever the plug-in code generates as display</li>
						</ul>
						<li>Examples of plug-ins:</li>
						<ul>
							<li><a href="http://get.adobe.com/reader/">Adobe Reader</a> for rendering PDF documents</li>
							<li><a href="http://get.adobe.com/flashplayer/">Adobe Flash Player</a> for running Flash applications</li>
							<li><a href="http://java.com/en/download/help/5000011200.xml">Java Runtime Environment (JRE)</a> for executing Java Applets</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Advantages/Disadvantages</title>
					<ul>
						<li>Advantages of plug-ins:</li>
						<ul>
							<li>high performance (OS-specific code)</li>
							<li>reasonably easy to implement if OS specific code already exists</li>
						</ul>
						<li>Disadvantages of plug-ins:</li>
						<ul>
							<li>hard to implement for a new OS (needs OS-specific code)</li>
							<li>no easy fallback if not supported by a browser</li>
							<li>plug-in internals are <q>invisible</q> to the browser</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="external-viewer">
				<title>External Viewers</title>
				<slide>
					<title>Browser and Applications</title>
					<ul>
						<li>Applications often are not integrated with a browser</li>
						<ul>
							<li>designed as standalone applications</li>
							<li>capable of handling certain media types</li>
							<li>registered with the OS and/or with a browser</li>
						</ul>
						<li>External applications can do anything they like with a resource</li>
						<ul>
							<li>the browser downloads the resource to a temporary file</li>
							<li>the external viewer is started and handed the file's location</li>
						</ul>
						<li>Examples of external viewers:</li>
						<ul>
							<li><a href="http://en.wikipedia.org/wiki/Office_suite">Office Suites</a> for handling documents, spreadsheets, and presentations</li>
							<li><a href="http://get.adobe.com/reader/">Adobe Reader</a> for rendering PDF documents</li>
							<li><a href="http://earth.google.com/">Google Earth</a> for handling <em>KML</em> files</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Advantages/Disadvantages</title>
					<ul>
						<li>Advantages of external viewers:</li>
						<ul>
							<li>almost no integration effort with a browser (just registering)</li>
							<li>sophisticated and highly optimized applications</li>
							<li>can be taken offline and used for non-Web activities</li>
						</ul>
						<li>Disadvantages of external viewers:</li>
						<ul>
							<li>high dependency on platform and configuration of a user's browser</li>
							<li>completely breaks the workflow of working with a browser</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Others</title>
				<slide id="greasemonkey">
					<title>Greasemonkey</title>
					<ul>
						<li>Greasemonkey is an <q><link href="add-on">add-on</link> for add-ons</q></li>
						<li>Runtime environment for <em href="http://userscripts.org/">user scripts</em></li>
						<ul>
							<li>specifically addressing a Web page or a Web site</li>
							<li>locally changing the Web page (in the browser)</li>
							<li>support for changing a Web site's display according to my user needs</li>
						</ul>
						<li>User scripts analyze/extract/update a Web page</li>
						<ul>
							<li>when Web pages are redesigned, user scripts often break</li>
							<li>a fragile way of repurposing the information from a Web site</li>
						</ul>
					</ul>
				</slide>
			</part>
        </part>
        <part>
			<title>Extended Browsers</title>
			<slide id="chrome">
				<title>Chrome</title>
				<img src="chrome-logo.png" style="float : right ; margin : 0 1em 2em 2em ; width : 20% ; " href="http://www.google.com/chrome/" title="Google Chrome"/>
				<ul>
					<li>Google's goal is to move more information online</li>
					<li>Today's browsers lack good offline support</li>
					<ul>
						<li><a href="http://gears.google.com/">Google Gears</a> <link href="add-on">add-on</link> adds offline capabilities for Firefox</li>
					</ul>
					<li><a href="http://www.google.com/chrome/">Chrome</a> is built around Google's strategic goals</li>
					<ul>
						<li>robust and high quality rendering based on <a href="http://webkit.org/">WebKit</a></li>
						<li>efficient and fast execution of scripting code with <a href="http://code.google.com/p/v8/">V8</a></li>
						<li>includes Google Gears for offline mode</li>
						<li>minimal user interface to look more like an application</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Accessibility Browsers</title>
				<ul>
					<li>Browsers for blind users depend on well-designed Web pages</li>
					<ul>
						<li>some browsers read aloud the textual information on a page</li>
						<li>Braille readers are an alternative to read-aloud browsers</li>
					</ul>
					<li>Only well-designed Web content is accessible to these browsers</li>
					<ul>
						<li>HTML and CSS content can be analyzed in the browser</li>
						<li><link href="plug-in"/> content (such as Flash) cannot be handled at all</li>
						<li><link href="external-viewer"/> may have their own accessibility features</li>
					</ul>
					<li>Usability should play an important role in Web design</li>
					<ul>
						<li><a href="http://www.section508.gov/">Section 508</a> requires public information to be made accessible</li>
					</ul>
				</ul>
			</slide>
        </part>
        <slide>
			<title>Conclusions</title>
			<ul>
				<li>Browsers are more than just <q>HTML viewers</q></li>
				<li>Users can customize their browsing experience</li>
				<li>Information providers should be aware of browser issues</li>
			</ul>
        </slide>
    </presentation>
    <presentation id="html">
        <title short="HTML">Hypertext Markup Language (HTML)</title>
        <date>2009-09-03</date>
        <toc class="reading"><a href="http://www.w3.org/MarkUp/Guide/">Getting started with HTML</a></toc>
        <toc class="resources"><a href="http://www.w3schools.com/html">HTML&#160;Tutorial</a>&#160;· <a href="https://developer.mozilla.org/en/HTML/Element">HTML&#160;Reference</a>&#160;· <a href="http://validator.w3.org/">HTML&#160;Validator</a></toc>
        <toc class="assignment"><a href="a/1/">A1</a>&#160;assigned (due&#160;date:&#160;9/20)</toc>
        <toc class="abstract">The <em>Hypertext Markup Language (HTML)</em> is the most important content type on the Web. This lecture covers a basic overview of how to use HTML markup in general. In particular, we look at page titles, meta tags, inserting text and images, using lists, and creating simple tables. Attributes can be used for more layout control in the HTML tags, but most layout issues are deferred until the CSS lecture.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
		<slide id="html-markup">
			<title>Structured Documents on the Web</title>
			<ul>
				<li><em>Hypertext Markup Language (HTML)</em> is the language of the Web</li>
				<ul>
					<li><em>Hypertext</em> because the Web is a hypermedia system</li>
					<li><em>Markup</em> because documents are encoded using text</li>
					<li><em>Language</em> because HTML is used for communications</li>
				</ul>
				<li><em>Markup Languages</em> are different from most file formats</li>
				<ul>
					<li>many computer formats are binary encoded and not <q>just text</q></li>
					<li><em>markup</em> allows structured documents to be encoded <em>as just text</em></li>
				</ul>
				<li>Web data formats use markup as well as other encodings</li>
				<ul>
					<li><em>HTML</em> and <em>XML</em> are markup languages</li>
					<li><em>JavaScript</em> is also exchanged textually (but it's not markup)</li>
					<li>images and other multimedia content is encoded as binary files</li>
				</ul>
			</ul>
		</slide>
		<part id="html-validation">
			<title>HTML Validation</title>
			<slide>
				<title>Checking for Correctness</title>
				<ul>
					<li>HTML's structure is defined by a <em>Document Type Definition (DTD)</em></li>
					<ul>
						<li>formally speaking, a DTD defines the grammar of the HTML language</li>
						<li>(and if you really want to know, <em>SGML</em> defines the syntax)</li>
						<li>colloquially speaking, a DTD defines how to combine elements and attributes</li>
					</ul>
				</ul>
				<listing src="html4-strict.dtd" line="513-517" title="Syntax for Unordered Lists (UL)"/>
				<listing src="html4-strict.dtd" line="521-524" title="Syntax for List Items (LI)"/>
				<listing src="html4-strict.dtd" line="258" title="Definition of %flow;"/>
				<listing src="html4-strict.dtd" line="254-256" title="Definition of %block;"/>
			</slide>
			<slide id="tool-validation">
				<title>Tool-Based Validation</title>
				<ul>
					<li>Testing HTML makes sure that it is well-defined</li>
					<ul>
						<li>any errors have to be corrected by the browser</li>
						<li>the results of browser-side corrections are hard to predict</li>
					</ul>
					<li>HTML editors allow validation within the tool</li>
					<ul>
						<li>in theory, using the <a href="http://www.w3.org/TR/html4/loose.dtd">public DTD</a>, in practice, using a <a href="src/html4-loose.dtd">local copy</a></li>
					</ul>
					<li>In theory, HTML editors should always produce valid HTML</li>
					<ul>
						<li>things today are not as bad as they used to be</li>
						<li>creating valid HTML can be a challenge for complex Web pages</li>
					</ul>
				</ul>
			</slide>
			<slide id="web-validation">
				<title>Web-Based Validation</title>
				<ul>
					<li><link href="tool-validation"/> requires locally installed tools</li>
					<ul>
						<li>may be hard to install and hard to maintain across computers</li>
						<li>for power users, locally installed tools are hard to beat</li>
					</ul>
					<li><em>Web-based tools</em> allow validation from anywhere</li>
					<li><a href="http://www.w3.org">W3C</a>'s <a href="http://validator.w3.org/">markup validation service</a> supports three modes:</li>
					<ol>
						<li>validation by URI (pointing at a random Web page)</li>
						<li>validation by file upload (allows validation of non-Web files)</li>
						<li>validation by copy/paste (lightweight mode for small experiments)</li>
					</ol>
					<li>Markup validation is only one facet of checking Web content</li>
					<ul>
						<li><a href="http://jigsaw.w3.org/css-validator/">checking CSS code for validity</a></li>
						<li><a href="http://validator.w3.org/mobile/">checking Web pages for mobile content (i.e., simpler HTML)</a></li>
						<li><a href="http://validator.w3.org/checklink">checking Web pages for broken links</a></li>
					</ul>
				</ul>
			</slide>
			<slide id="firebug">
				<title>Firebug</title>
				<img src="firebug-logo.png" style="float : right ; margin : 0 1em 2em 2em ; " href="http://getfirebug.com/" title="Firebug"/>
				<ul>
					<li>Browser-based inspection (much better than <q>view source</q>)</li>
					<li>Learning Web design by looking at Web design</li>
					<ul>
						<li>Firefox's <em>View → Page Source</em> provides access to a page's source</li>
						<li>Firefox's <em>Tools → Page Info</em> provides access to all ancillary files</li>
					</ul>
					<li>Understanding how complex HTML works is hard</li>
					<ul>
						<li>looking at the source requires <q>brain-based rendering</q></li>
						<li>looking at a rendered document makes it hard to see the source</li>
						<li>Firebug provides a convenient inspection feature for Web pages</li>
					</ul>
					<li>Inspection allows both directions of understanding HTML</li>
					<ul>
						<li>inspecting the rendered page and looking at the source part</li>
						<li>inspecting the source and seeing how it is being rendered</li>
					</ul>
					<li>Firebug also displays the associated <link href="css">CSS</link> code</li>
				</ul>
			</slide>
		</part>
		<slide>
			<title>HTML and WYSIWYG</title>
			<ul>
				<li>Thinking of HTML as a page-description language is wrong</li>
				<ul>
					<li>the world's worst Web page editor: <a href="http://geocities.yahoo.com/v/pb.html">Yahoo! PageBuilder</a></li>
				</ul>
				<li>HTML has been designed as a structure-description language</li>
				<ul>
					<li>structured contents can be reformatted and reflowed</li>
				</ul>
				<li>HTML rendering depends on a many client properties</li>
				<ul>
					<li>screen/window size, resolution, and color depth</li>
					<li>different available fonts</li>
					<li>fonts with the same family name but different metrics</li>
					<li>different hyphenation algorithms</li>
					<li>hyphenation setting defaults</li>
					<li>hyphenation dictionaries</li>
					<li>different size spaces</li>
					<li>different line-breaking algorithms</li>
					<li>different widow/orphan/keeptogether rules</li>
				</ul>
			</ul>
		</slide>
        <slide>
			<title>Web Browsers</title>
			<img src="web-browser-usage.png" style="float : right ; width : 40% ; margin : 0 1em 2em 2em ; " href="http://en.wikipedia.org/wiki/Usage_share_of_web_browsers" title="Web Browser Usage"/>
			<p><span style="border:none;background-color:#0000ff;color:#0000ff;" class="aide" title="#0000ff">██</span>&#160;Internet Explorer (69.80%)</p>
			<p><span style="border:none;background-color:#ff9900;color:#ff9900;" class="aide" title="#ff9900">██</span>&#160;Mozilla Firefox (20.66%)</p>
			<p><span style="border:none;background-color:#cccccc;color:#cccccc;" class="aide" title="#cccccc">██</span>&#160;Safari (7.18%)</p>
			<p><span style="border:none;background-color:#ffff00;color:#ffff00;" class="aide" title="#ffff00">██</span>&#160;Chrome (0.87%)</p>
			<p><span style="border:none;background-color:#ff0000;color:#ff0000;" class="aide" title="#ff0000">██</span>&#160;Opera (0.72%)</p>
			<p><span style="border:none;background-color:#66cc33;color:#66cc33;" class="aide" title="#66cc33">██</span>&#160;Netscape (0.52%)</p>
			<p><span style="border:none;background-color:#00ffff;color:#00ffff;" class="aide" title="#00ffff">██</span>&#160;Other (0.25%)</p>
        </slide>
        <part>
			<title>HTML and Structure</title>
			<slide>
				<title>Text</title>
				<ul>
					<li><htmel>h1</htmel>-<htmel>h6</htmel> are different levels of <a href="http://www.w3.org/TR/REC-html40/struct/global.html#h-7.5.5">headings</a></li>
					<li><htmel>p</htmel> contains <a href="http://www.w3.org/TR/REC-html40/struct/text.html#h-9.3.1">paragraph text</a></li>
					<ul>
						<li>whitespace and line wrapping are ignored</li>
						<li>paragraphs are set as boxes containing a number of lines</li>
					</ul>
					<li>Text inside paragraphs can use additional markup (<q href="http://www.w3.org/TR/REC-html40/struct/text.html#h-9.2.1">phrase markup</q>)</li>
					<ul>
						<li><htmel>em</htmel> for <em>emphasized text</em></li>
						<li><htmel>strong</htmel> for text with a <strong>strong emphasis</strong></li>
						<li><htmel>sub</htmel> for <sub>subscript text</sub></li>
						<li><htmel>sup</htmel> for <sup>superscript text</sup></li>
						<li><htmel>q</htmel> for <q>quoted text</q> (<q>try <q>nesting</q> quotes</q>)</li>
						<li><htmel>code</htmel> for <code>code examples</code></li>
					</ul>
					<li>rendering of all these elements is built into the browser</li>
					<ul>
						<li>more sophisticated issues probably <a href="http://dret.typepad.com/dretblog/2008/04/internationaliz.html">are more browser-dependent</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>More Advanced Text</title>
				<ul>
					<li>Quotations can be explicitly marked up as such</li>
					<ul>
						<li><htmel>blockquote</htmel> for block-level quotations</li>
						<li><htmel>q</htmel> for inline quotations (part of a block)</li>
						<li><htmla>cite</htmla> provides support for <q cite="http://www.w3.org/TR/REC-html40/struct/text.html#edef-CITE">pointing to the source</q></li>
					</ul>
					<li>Preformatted text allows text formatting in the HTML source</li>
					<ul>
						<li><htmel>pre</htmel> leaves whitespace intact and usually uses monospaced fonts</li>
						<li>word wrapping may be turned off by default</li>
					</ul>
					<listing src="pre.html" line="11-17" title="Preformatted text"/>
				</ul>
			</slide>
			<slide>
				<title>Lists</title>
				<ul>
					<li>HTML supports three kinds of lists</li>
					<ul>
						<li><htmel>ul</htmel> for <em href="http://www.w3.org/TR/REC-html40/struct/lists.html#h-10.2">unordered lists</em> containing <htmel title="list item">li</htmel></li>
						<li><htmel>ol</htmel> for <em href="http://www.w3.org/TR/REC-html40/struct/lists.html#h-10.2">ordered lists</em> containing <htmel title="list item">li</htmel></li>
						<li><htmel>dl</htmel> for <em href="http://www.w3.org/TR/REC-html40/struct/lists.html#h-10.3">definition lists</em> containing <htmel title="definition term">dt</htmel>/<htmel title="definition description">dd</htmel></li>
					</ul>
				</ul>
				<listing src="lists.html" line="7-26" title="HTML List Types"/>
			</slide>
			<slide>
				<title>Tables</title>
				<ul>
					<li>Tables are the most complex visual structure in HTML</li>
					<ul>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2.1">table</htmel> represents a table as a sequence of rows</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2.5">tr</htmel> represents a <em>table row</em> as a sequence of cells</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2.6">td</htmel> represents a table cell containing <em>table data</em></li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2.6">th</htmel> is a special cell containing <em>header data</em></li>
					</ul>
				</ul>
				<listing src="tables.html" line="10-23" title="HTML Table with Rows, Cells, and Header Cells"/>
			</slide>
			<slide>
				<title>Merging Table Cells</title>
				<img src="mergedcells.gif" style="width : 90% ; margin : 2em ; " href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.1" title="HTML Table with Merged Cells"/>
			</slide>
			<slide>
				<title>Merging Rows</title>
				<ul>
					<li>Table cells can span more than table row</li>
					<ul>
						<li><htmla>rowspan</htmla> specifies how many rows a cell is spanning</li>
						<li>subsequent table rows must leave that space <q>empty</q></li>
					</ul>
				</ul>
				<listing src="tables.html" line="25-37" title="Merging HTML Table Rows"/>
			</slide>
			<slide>
				<title>Merging Columns</title>
				<ul>
					<li>Table cells can span more than a single table column</li>
					<ul>
						<li><htmla>colspan</htmla> specifies how many columns a cell is spanning</li>
						<li>the following cells in the same row must be left <q>empty</q></li>
					</ul>
				</ul>
				<listing src="tables.html" line="39-53" title="Merging HTML Table Columns"/>
			</slide>
			<slide>
				<title>Images</title>
				<ul>
					<li>The Web is an open hypermedia system</li>
					<ul>
						<li><em>hyper</em> refers to the term <q>hypertext</q> for linked content</li>
						<li><em>media</em> refers to the fact that multiple media types are supported</li>
					</ul>
					<li>For a long time, the Web only supported text and images</li>
					<ul>
						<li>images can be used in a variety of formats (GIF, JPEG, PNG)</li>
						<li>audio and video are possible today, but not <q>part of the Web</q></li>
					</ul>
					<li>Images are not part of a Web page, they are included by markup</li>
					<ul>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/objects.html#h-13.2">img</htmel> is an empty element for including images</li>
						<li><htmla>src</htmla> is a URI pointing to the image (often a relative URI)</li>
					</ul>
				</ul>
				<pre>&lt;img src="../img/portrait.png" alt="Portrait"></pre>
			</slide>
			<slide>
				<title>Image Details</title>
				<ul>
					<li>Images must use a format supported by the browser</li>
					<ul>
						<li>GIF, JPEG, and PNG are pretty safe choices</li>
						<li>HTTP allows the browser to understand the image format</li>
						<li>limited browsers might have size/complexity restrictions</li>
					</ul>
					<li>Image information increases accessibility of a page</li>
					<ul>
						<li><htmla>alt</htmla> contains a short description of the image</li>
						<li>for icons it is essential to provide this information</li>
					</ul>
					<li>Image dimensions and image rendering</li>
					<ul>
						<li><htmla>width</htmla>/<htmla>height</htmla> specify the dimensions of the image</li>
						<li>allows the browser to start rendering the page before the images are received</li>
						<li>will be used to resize the image if the real image size is different</li>
						<li>browser-based scaling of images <a href="http://offog.org/articles/image-scaling/">often is not a good idea</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Links</title>
				<ul>
					<li>Links are the most important feature of the Web</li>
					<ul>
						<li>conceptually, the Web is one large hypermedia document</li>
						<li>links are based on Web identifiers, the <em>Uniform Resource Identifier (URI)</em></li>
					</ul>
					<li><htmel>a</htmel> is a link <em>anchor</em> and links to a URI (the <em>link target</em>)</li>
					<pre>&lt;a href="http://www.berkeley.edu" title="UC Berkeley">UCB&lt;/a></pre>
					<li>URIs can have various forms</li>
					<ul>
						<li><code>http:</code> points to resources available on Web servers</li>
						<li><code>https:</code> is the same but uses encrypted connections</li>
						<li>URIs can use a variety of other <link href="uri-schemes"/></li>
						<li>URIs can be relative (in the same was as file names)</li>
						<li>relative URIs are evaluated relative to the URI of their occurrence</li>
						<li>relative URIs can use path segments such as <q><code>/</code></q> and <q><code>..</code></q></li>
					</ul>
				</ul>
			</slide>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>HTML Matters</title>
				<ul>
					<li>HTML is not just getting text displayed</li>
					<li>Good HTML allows better browsing</li>
					<li>First represent as much as possible in HTML</li>
					<li>Then add what is missing as <link href="css">CSS</link> and/or microformats</li>
					<li>Graceful degradation is important</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="html-advanced">
        <title>Advanced HTML</title>
        <date>2009-09-08</date>
        <toc class="reading"><a href="http://www.w3.org/MarkUp/Guide/Advanced.html">Advanced HTML</a></toc>
        <toc class="resources"><a href="http://www.maschek.hu/imagemap/imgmap">Online Image Map Editor</a></toc>
        <toc class="abstract">This lecture covers linking in general and in header information, and a more general view of HTML layout based on the box model used by browsers. The concept of <em>frames</em> is introduced, which can be used in a combination of framesets and pages, or as inline frames. Finally, <em>image maps</em> are introduced as a way of how images can be turned not only into links, but into a set of various linked areas overlayed over the image.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
        <part>
			<slide>
				<title>Other Links</title>
				<ul>
					<li>Links using <htmel>a</htmel> are the most important links on the Web</li>
					<ul>
						<li><htmla>href</htmla> points to the link target</li>
						<li>most of the time, the link anchor is text or an image</li>
					</ul>
					<li>HTML has many more element linking to other resources</li>
					<ul>
						<li><a href="http://www.w3.org/TR/REC-html40/struct/text.html#h-9.2.2"><htmel>q</htmel>/<htmel>blockquote</htmel></a> point to the source of quotations</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/objects.html#h-13.2">img</htmel> specifies an image and embeds this image into the page</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/interact/forms.html#h-17.3">form</htmel> points to a URI to which the contents of a form are submitted</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/objects.html#h-13.3">object</htmel> embeds an object in a web page (such as a Flash app)</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/present/frames.html#h-16.2.2">frame</htmel> loads a Web page into a frame</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/present/frames.html#h-16.5">iframe</htmel> embeds a Web page into a Web page</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/struct/links.html#h-12.3">link</htmel> connects a page to ancillary resources</li>
						<li><htmel href="http://www.w3.org/TR/REC-html40/interact/scripts.html#h-18.2.1">script</htmel> specifies the location of scripting code</li>
						<li>this list is not complete (but close)</li>
					</ul>
				</ul>
			</slide>
			<title>Header Information</title>
			<slide>
				<title>HTML Document Structure</title>
				<img style="height : 65% ; margin : 4% ; " src="html-head-body.png" title="HTML Document Structure"/>
			</slide>
			<slide>
				<title>HTML Document Type</title>
				<listing src="html-syntax.html" line="1" title="HTML Document Type Declaration"/>
				<ul>
					<li>HTML pages have to declare their document format</li>
					<ul>
						<li>browsers/clients should know which version of HTML they are dealing with</li>
						<li>HTML uses the same element names in different version</li>
					</ul>
					<li>A <em href="http://www.w3.org/TR/REC-html40/struct/global.html#h-7.2">Document Type Declaration</em> officially declares the document type</li>
					<li>HTML has three different document types:</li>
					<ol>
						<li><em>Transitional</em> for backwards-compatibility</li>
						<li><em>Strict</em> for HTML as it should be (more restricted than <q>Transitional</q>)</li>
						<li><em>Frameset</em> for using <link href="frames"/></li>
					</ol>
					<li>The <em>public identifier</em> specifies <a href="http://www.w3.org/TR/REC-html40/struct/global.html#h-7.2">HTML version information</a></li>
					<pre>&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"></pre>
					<pre>&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"></pre>
					<pre>&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"></pre>
				</ul>
			</slide>
			<slide id="html-metadata">
				<title>Document Metadata</title>
				<ul>
					<li>All document content is specified in the HTML <htmel>body</htmel></li>
					<ul>
						<li>this is what a browser renders in the document window</li>
						<li>rendering may need additional information such as scripts and style</li>
					</ul>
					<li>Information <em>about</em> the document is contained in its <htmel href="http://www.w3.org/TR/html4/struct/global.html#h-7.4.1">head</htmel></li>
				</ul>
				<listing src="html-syntax.html" line="1-9" title="HTML Document Head"/>
			</slide>
			<slide>
				<title>Essential Metadata</title>
				<ul>
					<li>Page <htmel href="http://www.w3.org/TR/html4/struct/global.html#h-7.4.2">title</htmel>s are used in various places</li>
					<ul>
						<li>in the title bar of the browser window and/or the browser tab</li>
						<li>when creating bookmarks</li>
						<li>in the results of search engines</li>
					</ul>
					<li>Style information can be embedded in <htmel href="http://www.w3.org/TR/html4/present/styles.html#edef-STYLE">style</htmel> (not reusable)</li>
					<li>HTML pages can <htmel href="http://www.w3.org/TR/html4/struct/links.html#edef-LINK">link</htmel> to external resources</li>
					<ul>
						<li>a number of <em href="http://www.w3.org/TR/html4/types.html#type-links">link types</em> define relationships</li>
						<li>some relationships are in widespread use but are not standardized (e.g., <q><code>icon</code></q>)</li>
						<li>one possible link type is <q><code>stylesheet</code></q> for pointing to external styles</li>
					</ul>
					<li>External styles need three pieces of information</li>
					<ul>
						<li><htmla>href</htmla> specifies the URI of the external stylesheet</li>
						<li><htmla>rel</htmla> specifies the link type <q><code>stylesheet</code></q></li>
						<li><htmla>type</htmla> specifies the type of the stylesheet as a media type (<code>text/css</code>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Additional Metadata</title>
				<ul>
					<li><htmel href="http://www.w3.org/TR/html4/struct/links.html#edef-BASE">base</htmel> sets the base URI for all relative URIs</li>
					<ul>
						<li>can be useful if the page contains many references to a different site</li>
						<pre title="Setting Base URI in the Document Head">&lt;base href="http://en.wikipedia.org/wiki/"></pre>
						<pre title="Using Base URI in the Document Body" href="http://en.wikipedia.org/wiki/HTML">&lt;a href="HTML" title="Wikipedia: HTML">HTML&lt;/a></pre>
					</ul>
					<li><htmel href="http://www.w3.org/TR/html4/struct/global.html#h-7.4.4.2">meta</htmel> specifies general metadata for a page</li>
					<ul>
						<li><html>keywords</html> and <html>description</html> from the early days of search engines, largely ignored these days</li>
						<li>additional metadata schemes have been defined (<em href="http://dublincore.org/documents/dcq-html/">Dublin Core</em>)</li>
						<li>metadata is a wide field and depends on usage and users</li>
					</ul>
				</ul>
			</slide>
		</part>
        <part>
			<title>Creating Content</title>
			<slide id="html-containers">
				<title>All-Purpose Elements</title>
				<ul>
					<li>HTML elements are supposed to convey structural semantics</li>
					<ul>
						<li><a href="http://www.w3.org/TR/html4/struct/lists.html">lists</a> are available in various flavors (<htmel>ul</htmel>, <htmel>ol</htmel>, <htmel>dl</htmel>)</li>
						<li>various <a href="http://www.w3.org/TR/html4/struct/text.html#h-9.2.1">phrase markup elements</a> are available (<htmel>em</htmel>, <htmel>strong</htmel>, <htmel>dfn</htmel>, <htmel>code</htmel>, <htmel>samp</htmel>, <htmel>kbd</htmel>, <htmel>var</htmel>, <htmel>cite</htmel>, <htmel>abbr</htmel>, <htmel>acronym</htmel>)</li>
						<li>various <a href="http://www.w3.org/TR/html4/struct/global.html#h-7.5.5">levels of headings</a> can be used (<htmel>h1</htmel>-<htmel>h6</htmel>)</li>
					</ul>
					<li>HTML content should represent structural information</li>
					<ul>
						<li>not all content can be mapped to HTML elements</li>
						<li>in many cases HTML elements are available (there are even <a href="http://www.w3.org/TR/html4/struct/text.html#h-9.4">diff elements</a>)</li>
					</ul>
					<li>HTML also has <a href="http://www.w3.org/TR/html4/struct/global.html#h-7.5.4">all-purpose elements</a></li>
					<ul>
						<li>these elements have no semantics and are just containers</li>
						<li><htmel>span</htmel> is used as an inline container</li>
						<li><htmel>div</htmel> is used as a block container</li>
						<li>all-purpose elements should only be used if no HTML element is available</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Retain Content Structures</title>
				<ul>
					<li>HTML should represent content structures</li>
					<ul>
						<li><link href="css">CSS</link> can be used to tweak the formatting (if required)</li>
					</ul>
					<li>Rich content should be mapped to rich Web pages</li>
					<ul>
						<li>use HTML elements if available</li>
						<li><link href="css-classes">augment HTML elements with CSS classes</link> for more specific semantics</li>
						<li>use <link href="microformats"/> for capturing more semantics</li>
					</ul>
					<li>HTML is just one possible representation of a resource</li>
					<ul>
						<li>the data model of resources should not be limited by HTML</li>
						<li>richer representations may become available in the future</li>
						<li>why are there so few Web pages with <uri>tel:</uri> URIs?</li>
					</ul>
				</ul>
			</slide>
        </part>
		<part id="box-model">
			<title short="Box Model">HTML/CSS Box Model</title>
			<slide>
				<title>Structure and Layout</title>
				<img style="height : 65% ; margin : 4% ; " src="box-intro.png" href="http://www.w3.org/TR/css3-box/#introduction"/>
			</slide>
			<slide>
				<title>Box Structure</title>
				<img style="height : 65% ; margin : 4% ; " src="box.png" href="http://www.w3.org/TR/css3-box/#introduction"/>
			</slide>
			<slide>
				<title>Floating Boxes Layout</title>
				<img style="height : 65% ; margin : 4% ; " src="float-box.png" href="http://www.w3.org/TR/css3-box/#introduction0"/>
			</slide>
			<slide>
				<title>Floating Boxes Markup</title>
				<listing src="float-box.html"/>
			</slide>
		</part>
        <part id="frames">
			<title>Frames</title>
			<slide>
				<title>Combining Documents in the Browser</title>
				<ul>
					<li>HTML pages usually are one document loaded by the browser</li>
					<li>Frames were created to be able to combine documents</li>
					<li>Frames were created when server-side frameworks were primitive</li>
					<ul>
						<li>building site navigation with frames is rather simple (HTML only)</li>
						<li>building site navigation without frames is harder (server support required)</li>
					</ul>
					<li>More modern applications combine content differently</li>
					<ul>
						<li>assembled on the server side and delivered as one document</li>
						<li>assembled in the browser via <link href="scripting"/> and one logical document</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Problems with Frames</title>
				<ul>
					<li>Frame-based sites are hard to use from the Web point of view</li>
					<ul>
						<li>it is hard or impossible to link to pages</li>
						<li>user's have a hard time creating bookmarks</li>
					</ul>
					<li>Search engines have problems pointing users to results</li>
					<ul>
						<li>pointing to the frameset might not even contain the result</li>
						<li>pointing to individual pages can result in unusable pages</li>
					</ul>
					<li>Printing frame-based pages usually does not work very well</li>
					<ul>
						<li>most browsers support print functionality for one frame at a time</li>
					</ul>
					<li>Frames are not considered good practice anymore</li>
					<ul>
						<li>they can still be useful for internal or limited audiences</li>
						<li>they can be useful for rapid prototyping</li>
						<li>pages for a general Web audience should not use frames</li>
					</ul>
				</ul>
			</slide>
			<part id="html-frames">
				<title>Regular Frames</title>
				<slide>
					<title>Framesets and Frames</title>
					<ul>
						<li>HTML pages can be HTML content or <a href="http://www.w3.org/TR/REC-html40/present/frames.html#h-16.1">framesets</a></li>
						<li>For framesets, the page <em>only</em> defines a frameset <q>skeleton</q></li>
						<ul>
							<li>the <htmel href="http://www.w3.org/TR/REC-html40/present/frames.html#h-16.2.1">frameset</htmel> described the structure of the page</li>
							<li>individual <htmel href="http://www.w3.org/TR/REC-html40/present/frames.html#h-16.2.2">frame</htmel>s point to actual HTML content</li>
						</ul>
						<li>The browser retrieves the frameset and all frame contents</li>
						<ul>
							<li>rendering a frameset results in a <em>compound document</em></li>
						</ul>
						<li>Links in the frameset can load content into individual frames</li>
						<ul>
							<li>a <htmel>frame</htmel>'s <htmla>name</htmla> identifies a frame with a name</li>
							<li><htmel>a</htmel>'s <htmla>target</htmla> instructs the browser to load content into that frame</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Frameset and Frame Content</title>
					<listing src="lectures.html" line="2-10"/>
					<listing src="lectures-toc.html" line="2-15"/>
				</slide>
			</part>
			<part id="iframes">
				<title>IFrames</title>
				<slide>
					<title>Embedding HTML into HTML</title>
					<ul>
						<li><em href="http://www.w3.org/TR/REC-html40/present/frames.html#h-16.5">Inline frames</em> embed HTML pages in HTML pages</li>
						<ul>
							<li><link href="html-frames"/> segment the window and then load HTML into the parts</li>
							<li><htmel>iframe</htmel> is a box somewhere in an HTML page and contains an HTML page</li>
						</ul>
						<li>IFrames have the same usability/accessibility issues as frames</li>
						<ul>
							<li>printing is a problem (scrolled content in scrolled content)</li>
							<li>navigation is a problem (complex navigation in the context of one page)</li>
							<li>addressing (bookmarks/search) is a problem</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>IFrame Example</title>
					<listing src="iframes.html" title="IFrames for the Lecture Slides"/>
				</slide>
			</part>
        </part>
        <part id="image-map">
			<title>Image Maps</title>
			<slide>
				<title>Clickable Images</title>
				<ul>
					<li>Links on Web pages are often text or images</li>
					<ul>
						<li>almost anything can be turned into a link by wrapping it in <htmel>a</htmel></li>
						<li>if an image turns into a link, any part of the image can be clicked</li>
					</ul>
					<li>Images may convey structural information</li>
					<ul>
						<li>an organizational chart of a company</li>
						<li>a world map with countries or regions</li>
						<li>photographs with marked parts (buildings, persons, …)</li>
					</ul>
					<li><em href="">HTML image maps</em> turn an image into a structured link</li>
					<ul>
						<li>the older <link href="server-side-image-map"/> are simpler for the browser</li>
						<li>the newer <link href="client-side-image-map"/> are much more user-friendly</li>
					</ul>
				</ul>
			</slide>
			<slide id="server-side-image-map">
				<title>Server-Side Image Maps</title>
				<pre>&lt;a href="server-side-program">&lt;img src="grid.png" ismap="ismap"/>&lt;/a></pre>
				<a href="server-side-program"><img style="margin : 2em " src="grid.png" ismap="ismap"/></a>
			</slide>
			<slide id="client-side-image-map">
				<title>Client-Side Image Maps</title>
				<img src="grid.png" style="float : left ; margin : 2em ; " usemap="#gridmap"/>
				<map name="gridmap">
					<a title="Square 2/2" href="square22" shape="rect" coords="60,60,96,96"/>
					<a title="Square 3/6" href="square36" shape="rect" coords="106,244,142,282"/>
				</map>
				<pre>&lt;img src="grid.png" usemap="#gridmap">
&lt;map name="gridmap">
	&lt;a title="Square 2/2" href="square22" shape="rect" coords="60,60,96,96"/>
	&lt;a title="Square 3/6" href="square36" shape="rect" coords="106,244,142,282"/>
&lt;/map>
				</pre>
				<ul>
					<li>Various shapes are supported</li>
					<ul>
						<li><html>rect</html> for rectangles (x<sub>1</sub>,y<sub>1</sub>,x<sub>2</sub>,y<sub>2</sub>)</li>
						<li><html>circle</html> for circles (x,y,radius)</li>
						<li><html>poly</html> for polygons (x<sub>1</sub>,y<sub>1</sub>,…,x<sub>n</sub>,y<sub>n</sub>)</li>
					</ul>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="css">
        <title short="CSS">Cascading Style Sheets (CSS)</title>
        <date>2009-09-15</date>
        <toc class="reading"><a href="http://www.w3.org/MarkUp/Guide/Style">Adding a Touch of Style</a></toc>
        <toc class="resources"><a href="http://www.w3.org/TR/CSS21/" title="W3C CSS 2.1 Specification">CSS Spec</a>&#160;· <a href="http://www.w3.org/TR/CSS21/propidx.html" title="W3C CSS 2.1: Property Index">Properties</a>&#160;· <a href="http://www.w3schools.com/css">CSS&#160;Tutorial</a>&#160;· <a href="http://jigsaw.w3.org/css-validator/">CSS&#160;Validator</a></toc>
        <toc class="abstract"><em>Cascading Stylesheets (CSS)</em> have been designed as a language for better separating presentation-specific issues from the structuring of documents as provided by HTML. CSS uses a simple model of <em>selectors</em> and <em>declarations</em>. Selectors specify to which elements of a document a set of declarations (each being a value assigned to a property) apply; in addition there is a model of how property values are inherited and cascaded. The biggest limitation of CSS is that it cannot change the structure of the displayed document.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
		<part>
			<title>Why CSS?</title>
			<slide>
				<title>Structure vs. Layout</title>
				<ul>
					<li>HTML started as very simple layout-oriented structures</li>
					<ul>
						<li>more layout control was introduced as attributes (<xml>align</xml>, <xml>color</xml>)</li>
						<li>HTML became increasingly <q>polluted</q> by layout information</li>
					</ul>
					<li>CSS was introduced as a format for layout information</li>
					<ul>
						<li>the HTML can be kept simple, containing only the structures</li>
						<li>layout information can be reused by using separate CSS files</li>
					</ul>
					<li>CSS has been designed for HTML</li>
					<ul>
						<li>it has been generalized to also cover XML</li>
						<li>the HTML heritage is still very visible in CSS</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>What's Still Missing?</title>
				<ol>
					<li>Restructuring content</li>
					<ul>
						<li>CSS assigns formatting properties to elements</li>
						<li>the document tree which is formatted cannot be restructured</li>
						<li>parts can be <em>ignored</em> or new parts can be <em>inserted</em></li>
					</ul>
					<li>Interpreting content</li>
					<ul>
						<li><htmel>img</htmel> has a lot of special meanings attached</li>
						<li>form elements have special semantics (such as creating input fields)</li>
					</ul>
				</ol>
				<ul>
					<li><link href="scripting"/> can be used for implementing some of these issues</li>
					<li>CSS is simply for more sophisticated styling of HTML pages</li>
				</ul>
			</slide>
		</part>
		<part>
			<title>How CSS Works</title>
			<slide>
				<title>CSS in Action</title>
				<listing src="zengarden.html" line="17-30" href="http://www.csszengarden.com/"/>
			</slide>
			<slide>
				<title>HTML and CSS</title>
				<ul>
					<li>CSS specifies how HTML elements are formatted</li>
					<ol>
						<li>formatting can be attached to every element (redundant inside document)</li>
						<li>formatting can be included in document (redundant across documents)</li>
						<li>separate CSS files describe the formatting (best reuse)</li>
					</ol>
					<li>Any combination of these methods is possible</li>
				</ul>
				<listing src="css-usage.html" line="3-13"/>
			</slide>
			<slide>
				<title>Formatting Model</title>
				<ul>
					<li><link href="css-properties"/> are central to the CSS formatting model</li>
					<ol>
						<li>create a document tree</li>
						<li>identify the media type (e.g., <css>screen</css> or <css>print</css>)</li>
						<li>retrieve all stylesheets required for the media type</li>
						<li>assign values to all properties in the document tree</li>
						<li>generate a <em>formatting structure</em> (a different tree)</li>
						<li>render the formatting structure on the target medium</li>
					</ol>
					<li>Properties control the rendering of elements</li>
					<li>Styling in CSS means assigning values to properties</li>
				</ul>
			</slide>
		</part>
		<part id="css-strategies">
			<title>CSS Strategies</title>
			<slide id="css-classes">
				<title>Use Classes &amp; Containers</title>
				<ul>
					<li>CSS code should never show up in your HTML</li>
					<li>Classes should reflect content or formatting</li>
					<li><link href="html-containers">Containers</link> can restrict styles to a context</li>
					<li>Context can be nested</li>
					<ul>
						<li>orthogonal concepts should be represented as nested classes</li>
						<li>for example, pages for <q>staff</q> and <q>faculty</q> and <q>current</q> and <q>past</q> as classification</li>
						<li>different levels of formatting sophistication can be implemented with CSS only</li>
					</ul>
					<li>Avoid redundant CSS code</li>
					<ul>
						<li>Whenever appropriate, <css>inherit</css> properties</li>
						<li>for invisible links use <css>a { color : inherit ; }</css></li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="css-properties">
			<title>Properties</title>
			<slide>
				<title>Formatting Instructions</title>
				<ul>
					<li>Properties define how elements are formatted</li>
					<ul>
						<li>they define a specific facet of formatting</li>
						<li>they may have interdependencies with other properties</li>
						<li>they can be assigned explicitly</li>
						<li>they may be defined through <link href="css-cascading"/> or <link href="css-inheritance"/></li>
					</ul>
					<li>A property has a name and is used in a name/value-pair</li>
					<ul>
						<li>the name identifies the property that is being set</li>
						<li>the value space depends on the property</li>
						<li>some properties accept complex values</li>
						<li>sets of values: <css>p { font : bold italic large Palatino }</css></li>
						<li>sequences of values: <css>p { font-family : "Segoe UI", verdana, helvetica, arial, sans-serif }</css></li>
					</ul>
					<li>Property specifications can be grouped</li>
					<ul>
						<li><css>.thinboxed { border-width : 1px ; padding : 10px ; margin : 5px }</css></li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>CSS1 Properties</title>
				<slide>
					<title>Factoring out HTML</title>
					<ul>
						<li>CSS1 was published in <a href="http://www.w3.org/TR/REC-CSS1-961217">1996</a> and revised in <a href="http://www.w3.org/TR/1999/REC-CSS1-19990111">1999</a></li>
						<li>HTML suffered from too many attributes</li>
						<ul>
							<li>layout information was specified as CSS</li>
							<li>style attributes in HTML were marked as <q>deprecated</q></li>
						</ul>
						<li>A small set of formatting features as CSS properties</li>
						<ul>
							<li><a href="http://www.w3.org/TR/REC-CSS1/#font-properties">font</a>: <css>p { font : 80% sans-serif }</css></li>
							<li><a href="http://www.w3.org/TR/REC-CSS1/#color-and-background-properties">color and background</a>: <css>body { background : url(logo.jpeg) right top }</css></li>
							<li><a href="http://www.w3.org/TR/REC-CSS1/#text-properties">text</a>: <css>h1 { text-transform : uppercase }</css></li>
							<li><a href="http://www.w3.org/TR/REC-CSS1/#box-properties">box</a>: <css>p.quote { border-style : solid dotted }</css></li>
							<li><a href="http://www.w3.org/TR/REC-CSS1/#classification-properties">classification</a>: <css>img { display : none }</css></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>CSS2 Properties</title>
				<slide>
					<title>CSS2</title>
					<ul>
						<li>CSS2 was published in <a href="http://www.w3.org/TR/1998/REC-CSS2-19980512/">1998</a> and is <a href="http://www.w3.org/TR/CSS21/">still being  revised (CSS2<sup>1</sup>)</a></li>
						<li>CSS2<sup>1</sup> is what you can expect from modern browsers</li>
						<ul>
							<li>with IE (even IE7) being the exception</li>
						</ul>
						<li>CSS2 is a single and coherent specification</li>
						<ul>
							<li><a href="http://www.w3.org/TR/css3-roadmap/">CSS3</a> is a jungle of concurrent module development</li>
							<li>CSS3 will never be finished (some modules will, though)</li>
						</ul>
					</ul>
				</slide>
				<slide id="generated-content">
					<title>Generated Content</title>
					<ul>
						<li>CSS1 had no way of adding information to the document</li>
						<ul>
							<li>by using <css>display</css>, parts of the document could be ignored</li>
						</ul>
						<li><em href="http://www.w3.org/TR/CSS21/generate.html">Generated content</em> allows content to come from the CSS</li>
						<ul>
							<li>only possible with <css>:before</css> and <css>:after</css> <em>pseudo-elements</em></li>
							<li>static strings: <css>p.abstract:before { content : "Abstract: " }</css></li>
							<li>special effects like <q>quotes</q>: <css>q:before { content : open-quote } </css></li>
							<li>counters: <css>h1:before { content: "Chapter " counter(chapter) ". " ; counter-increment : chapter }</css></li>
						</ul>
						<li>Quotes can be defined as being language dependent</li>
						<ul>
							<li><css>q:lang(en) { quotes : '"' '"' "'" "'" }</css></li>
							<li><css>q:lang(no) { quotes : "«" "»" '"' '"' }</css></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Tables</title>
					<ul>
						<li>CSS1 does not address table formatting</li>
						<ul>
							<li>table layout still had to be done using HTML attributes</li>
							<li>a lot of redundant code specifying cell alignment and borders</li>
						</ul>
						<li>CSS2 introduces tables on the CSS level</li>
					</ul>
					<pre>table    { display: table }
tr       { display: table-row }
thead    { display: table-header-group }
tbody    { display: table-row-group }
tfoot    { display: table-footer-group }
col      { display: table-column }
colgroup { display: table-column-group }
td, th   { display: table-cell }
caption  { display: table-caption }</pre>
				</slide>
				<slide>
					<title>Fixed vs. Automatic Table Layout</title>
					<ul>
						<li>HTML defines a complex table rendering algorithm</li>
						<ul>
							<li>tables are rendered incrementally</li>
							<li>table layout is determined by looking at the complete table</li>
						</ul>
					</ul>
					<table width="90%" cellspacing="10%">
						<thead>
							<tr>
								<th>Automatic</th>
								<th>Fixed</th>
							</tr>
						</thead>
						<tr>
							<td width="45%">
								<table border="1">
									<tr>
										<td>col 1 row 1</td>
										<td>col 2 row 1 col 2 row 1</td>
										<td>col 3 row 1 col 3 row 1 col 3 row 1</td>
									</tr>
									<tr>
										<td>col 1 row 2</td>
										<td>col 2 row 2 col 2 row 2</td>
										<td>col 3 row 2 col 3 row 2 col 3 row 2</td>
									</tr>
								</table>
							</td>
							<td width="45%">
								<table border="1" style="table-layout : fixed ; ">
									<tr>
										<td width="33%">col 1 row 1</td>
										<td width="33%">col 2 row 1 col 2 row 1</td>
										<td width="33%">col 3 row 1 col 3 row 1 col 3 row 1</td>
									</tr>
									<tr>
										<td>col 1 row 2</td>
										<td>col 2 row 2 col 2 row 2</td>
										<td>col 3 row 2 col 3 row 2 col 3 row 2</td>
									</tr>
								</table>
							</td>
						</tr>
					</table>
				</slide>
			</part>
		</part>
		<part id="css-selectors">
			<title>Selectors</title>
			<slide>
				<title>Select and Style</title>
				<ul>
					<li><link href="css-properties"/> are applied to elements</li>
					<ul>
						<li>properties can be directly applied in an element's <htmla>style</htmla> attribute</li>
						<li>in all other cases, <em>selectors</em> are used to select the styled elements</li>
					</ul>
					<li>Selectors are good for reusable CSS code</li>
					<ul>
						<li>identifying the most appropriate formatting classes is not easy</li>
						<li>planning for CSS for a larger site is a difficult task</li>
					</ul>
					<li>CSS project management should separate selectors and properties</li>
					<ol>
						<li>selectors are about which things should be identified and styled</li>
						<li>properties are about how this styling is implemented</li>
					</ol>
				</ul>
			</slide>
			<part id="css1-selectors">
				<title>CSS1 Selectors</title>
				<slide>
					<title>CSS for Dummies</title>
					<ul>
						<li>Very small set of selectors</li>
						<ul>
							<li>selecting elements by name: <css>h1 { font-size : large }</css></li>
							<li>selecting elements by their <xml>id</xml>: <css>#author { font-weight : bold }</css></li>
							<li>selecting elements by their <xml>class</xml>: <css>.abstract { font-size : small }</css></li>
							<li>combining these mechanisms: <css>p.warning { color : red } </css></li>
						</ul>
						<li>Pseudo-classes and -elements allow interesting effects</li>
						<ul>
							<li><htmel>a</htmel> links have state: <css>a:visited</css> and <css>a:active</css></li>
							<li>selection without markup: <css>p:first-letter</css> and <css>p:first-line</css></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>CSS2 Selectors</title>
				<slide>
					<title>More Selectors</title>
					<ul>
						<li><link href="css1-selectors"/> are available</li>
						<ul>
							<li>element name, <htmla>id</htmla>, <htmla>class</htmla>, and combinations of these</li>
						</ul>
						<li>CSS2 introduced many new selectors</li>
						<ul>
							<li>descendants: <css>ul li { font : italic }</css></li>
							<li>children: <css>ul > li { font : italic }</css></li>
							<li>adjacent siblings: <css>h1 + h2 { margin-top : 0.5em }</css></li>
							<li>attribute matching: <css>h1[lang=nl] { color : orange }</css></li>
						</ul>
						<li>CSS2 selectors are sufficient for most tasks</li>
						<li>Setting <htmla>class</htmla> attributes is very important</li>
					</ul>
				</slide>
				<slide>
					<title>CSS2 Pseudo Classes</title>
					<ul>
						<li><link href="css1-selectors">CSS1's pseudo-elements</link> are available</li>
						<ul>
							<li>link states and first letter and line of content</li>
						</ul>
						<li>CSS2 adds more qualifications for elements</li>
						<ul>
							<li>first child: <css>p:first-child { text-indent : 0 }</css></li>
							<li>dynamic behavior: <css>a:hover { … } a:active { … } a:focus { … }</css></li>
							<li>language: <css>:lang(de) { quotes: '»' '«' '‹' '›' }</css></li>
							<li><link href="generated-content"/>: <css>q:before { content : open-quote } q:after { content : close-quote }</css></li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>CSS Mechanics</title>
			<slide id="css-cascading">
				<title>Cascading</title>
				<ul>
					<li>Stylesheets may have three different origins</li>
					<ol>
						<li><em>page authors</em> associate CSS with their pages</li>
						<li><em>users</em> configure their browser to use some CSS</li>
						<li><em>user agents (browsers)</em> have built-in CSS how to style content</li>
					</ol>
					<li>Conflicts must be resolved using the following algorithm</li>
					<ol>
						<li>find all matching declarations (matching media type and selector)</li>
						<li>sort according to importance (browser &lt; user &lt; author)</li>
						<li>same importance must be sorted by specificity (more specific selectors)</li>
						<li>finally, sort by order in which they were specified</li>
					</ol>
					<li><css>!important</css> rules can influence the algorithm</li>
					<ul>
						<li>they are interpreted in step 2 (sorting by importance)</li>
						<li>browser &lt; user &lt; author &lt; author(important) &lt; user(important)</li>
					</ul>
				</ul>
			</slide>
			<slide id="css-inheritance">
				<title>Inheritance</title>
				<ul>
					<li>Properties often are inherited by children</li>
					<ul>
						<li>setting a table's <css>color</css> sets the <css>color</css> for all contents</li>
						<li>without inheritance, CSS stylesheets would have to be very large</li>
					</ul>
					<li>Inheritance is mostly intuitive</li>
					<ul>
						<li>in reality, it is a bit more complicated</li>
					</ul>
					<ol>
						<li><em>specified value:</em> what the property specified (<link href="css-cascading"/>, inheritance, or initial)</li>
						<li><em>computed value:</em> computed based on the environment (e.g., <css>ex</css> → <css>px</css>)</li>
						<li><em>used value:</em> converted to an absolute value (e.g., percentage widths)</li>
						<li><em>actual value:</em> specific for the environment (e.g., borders with pixel fractions)</li>
					</ol>
				</ul>
			</slide>
			<slide id="css-import">
				<title>Structuring Stylesheets</title>
				<ul>
					<li>Stylesheets may need to be structured</li>
					<ul>
						<li>importing CSS code is supported: <css>@import "/dretnet.css" ;</css></li>
						<li>modules of CSS code can be reused in different contexts</li>
					</ul>
					<li>Stylesheets may be specific for a media type</li>
					<ul>
						<li><em>braille, embossed, handheld, print, projection, screen, speech, tty, tv</em></li>
						<li>specified in HTML: <htmel>link rel="stylesheet" type="text/css" media="print" href="/print.css"</htmel></li>
						<li>specified in CSS: <css>@media print { … }</css></li>
						<li>media-dependent import: <css>@import "/print.css" print ;</css></li>
					</ul>
				</ul>
			</slide>
		</part>
		<slide>
			<title>CSS Conclusions</title>
			<ul>
				<li>Appropriate for HTML</li>
				<ul>
					<li>Flexible selection of elements using <link href="css-selectors"/></li>
					<li>Powerful formatting of elements using <link href="css-properties"/></li>
					<li>Interesting interface design with <em>pseudo-classes</em> and <em>-elements</em></li>
				</ul>
				<li>Inappropriate for general publishing</li>
				<ul>
					<li>documents often need to be restructured</li>
					<li>XML → HTML+CSS is a popular Web publishing setup</li>
				</ul>
			</ul>
		</slide>
    </presentation>
    <presentation id="forms">
        <title>HTML Forms</title>
        <date>2009-09-16</date>
        <toc class="reading"><a href="http://htmlhelp.com/faq/html/forms.html" title="Web Design Group: HTML Forms Web Authoring FAQ">HTML Forms FAQ</a></toc>
        <toc class="resources"><a href="http://www.webstyleguide.com/wsg3/10-forms-and-applications/" title="Web Style Guide: Forms and Applications">Style Guide</a>&#160;· <a href="http://www.w3.org/TR/html401/interact/forms.html" title="HTML Specification: Forms">Forms Spec</a></toc>
        <toc class="abstract">This lecture introduces <em>HTML Forms</em>, a way how an HTML page can provide input fields, so that users can provide data to a Web-based application. HTML forms are regular HTML pages (i.e., using regular HTML structures), but they also contain special HTML elements for data entry. Most importantly, each form contains instructions on how to submit the entered data, and the browser will use that information to compose a request containing all the data of the form submission.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
		<part>
			<title>Forms Basics</title>
			<slide>
				<title>HTTP Web Services</title>
				<ul>
					<li>Services can be provided through URI/HTTP</li>
					<ul>
						<li>URI-based services need input as a <em>query string</em></li>
						<li>the question is how the user gets information into the URI</li>
					</ul>
					<li>HTML forms provide an interface for assembling query strings</li>
					<ul>
						<li>users fill out a form providing several fields</li>
						<li>the browser submits the entered information by HTTP to a URI</li>
						<li>the result of the request is displayed to the user</li>
					</ul>
					<li>HTTP has two different methods for submitting data</li>
					<ul>
						<li><code><link href="form-get">GET</link></code> encodes the data as a URI query string</li>
						<li><code><link href="form-post">POST</link></code> encodes the data as HTTP request entity</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Forms Mechanics</title>
				<ul>
					<li>HTML forms are normal Web pages (using form elements)</li>
					<li>The process receiving the form data produces a result page</li>
				</ul>
				<img style="width : 90% ; margin : 2% ; " src="form-mechanics.png"/>
			</slide>
			<slide>
				<title>Forms Markup</title>
				<ul>
					<li>All form elements must be inside a <htmel>form</htmel> element</li>
					<ul>
						<li>specifies the URI for submitting the form values (<xml>action="URI"</xml>)</li>
						<li>specifies the method for submitting the form values (<xml>method="<link href="form-get">GET</link>|<link href="form-post">POST</link>"</xml>)</li>
					</ul>
					<li><htmel>form</htmel> contains regular HTML markup and form elements</li>
					<ul>
						<li>the regular HTML markup creates the form's layout (table, list, texts)</li>
						<li>the form elements create the controls for acquiring input data</li>
					</ul>
					<li>Each <htmel>form</htmel> should have a <em>submit button</em></li>
					<ul>
						<li>when pressing this button, the form values are sent to the <xml>action</xml> URI</li>
						<li>without such a button, the form values cannot be submitted</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Forms Elements (User View)</title>
				<ul>
					<li>HTML provides a small set of form controls</li>
					<li>Sufficient for many applications</li>
				</ul>
				<hr/>
				<form action="http://stevex.net/dump.php" method="POST" enctype="multipart/form-data">
					<table>
						<tr><td valign="top" align="right">Text:</td><td><input type="text" name="text" value="text input"/></td></tr>
						<tr><td valign="top" align="right">Password:</td><td><input type="password" name="password" value="hidden text"/></td></tr>
						<tr><td valign="top" align="right">Checkbox:</td><td><input type="checkbox" name="check" value="1"/> <input type="checkbox" name="check" value="2"/> <input type="checkbox" name="check" value="3"/></td></tr>
						<tr><td valign="top" align="right">Radio Button:</td><td><input type="radio" name="radio" value="1"/> <input type="radio" name="radio" value="2"/> <input type="radio" name="radio" value="3"/></td></tr>
						<tr><td valign="top" align="right">Text Areas:</td><td><textarea name="textarea" rows="2" cols="20"/></td></tr>
						<tr><td valign="top" align="right">Selection:</td><td><select name="select"><option selected="selected">XML</option><option>SGML</option></select></td></tr>
						<tr><td valign="top" align="right">Multiple Selection:</td><td><select name="mselect" multiple="multiple"><option>242</option><option>290-3</option><option>290-13</option></select></td></tr>
						<tr><td valign="top" align="right">File Upload:</td><td><input name="file" type="file"/></td></tr>
						<tr><td valign="top" align="right">Hidden:</td><td><input type="hidden" name="hidden" value="hidden input"/></td></tr>
						<tr><td valign="top" align="right">Submit:</td><td><input name="submit" type="submit"/></td></tr>
					</table>
				</form>
			</slide>
			<slide>
				<title>Forms Elements (Source View)</title>
	<pre><![CDATA[<form action="http://stevex.net/dump.php" method="POST" enctype="multipart/form-data"><table>
	<tr><td>Text:</td><td><input type="text" name="text" value="text input"/></td></tr>
	<tr><td>Password:</td><td><input type="password" name="password" value="hidden text"/></td></tr>
	<tr><td>Checkbox:</td><td><input type="checkbox" name="check" value="1"/> <input type="checkbox" name="check" value="2"/> <input type="checkbox" name="check" value="3"/></td></tr>
	<tr><td>Radio Button:</td><td><input type="radio" name="radio" value="1"/> <input type="radio" name="radio" value="2"/> <input type="radio" name="radio" value="3"/></td></tr>
	<tr><td>Text Areas:</td><td><textarea name="textarea" rows="2" cols="20"/></td></tr>
	<tr><td>Selection:</td><td><select name="select"><option selected="selected">XML</option><option>SGML</option></select></td></tr>
	<tr><td>Multiple Selection:</td><td><select name="mselect" multiple="multiple"><option>242</option><option>290-3</option><option>290-13</option></select>
	<tr><td>File Upload:</td><td><input name="file" type="file"/></td></tr>
	<tr><td valign="top" align="right">Hidden:</td><td><input type="hidden" name="hidden" value="hidden input"/></td></tr>
	<tr><td>Submit:</td><td><input name="submit" type="submit"/></td></tr>
</table></form>]]></pre>
			</slide>
			<slide id="calendar">
				<title>Date Entry</title>
				<listing src="date-picker.html"/>
			</slide>
			<slide id="form-get">
				<title>Forms and GET</title>
				<ul>
					<li>Limited to string-oriented form values</li>
					<ul>
						<li>but HTML forms also allow file upload (this requires <code>POST</code>)</li>
					</ul>
					<li>All values of all form input fields are collected</li>
					<ul>
						<li>for text and selection fields, this is one input field</li>
						<li>for checkboxes and radio buttons, this collects the selected fields</li>
					</ul>
					<li>The browser composes a URI query string</li>
					<ul>
						<li>the form submission is a set of name/value pairs (names may appear more than once!)</li>
						<li>using URI query string notation, it is appended to the URI of the form's <xml>action</xml></li>
					</ul>
					<li><code>GET</code> is good!</li>
					<ul>
						<li>URI-encoded queries can be bookmarked and otherwise reused (e.g., cached)</li>
						<li>if possible, use <code>GET</code> when implementing a form</li>
					</ul>
				</ul>
			</slide>
			<slide id="form-post">
				<title>Forms and POST</title>
				<ul>
					<li><code>GET</code> encodes the values in the URI</li>
					<ul>
						<li>for file uploads, this is not possible</li>
						<li>HTTP's <code>POST</code> request method can upload data</li>
					</ul>
					<li><code>POST</code> sends a request containing an entity</li>
					<ul>
						<li>the HTTP request then looks similar to a response (header fields and entity)</li>
						<li>the receiving process (the Web server) accepts the POST body</li>
					</ul>
					<li>Entities can use any format (it is specified in a header field)</li>
					<ul>
						<li>just like e-mails, entities can have multiple parts</li>
						<li>the parts are separated using the standard MIME mechanism</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>POST Form Processing</title>
				<ul>
					<li><code>POST</code> is used if the <htmel>form</htmel> specifies it</li>
					<ul>
						<li>it can (but should not) be used for non-file forms</li>
						<li>it should be used for file upload forms (otherwise, only the name is uploaded)</li>
					</ul>
					<li>File upload forms must specify the appropriate encoding</li>
					<ul>
						<li><q><code>application/x-www-form-urlencoded</code></q> is the default (values in the entity)</li>
						<li><q><code>multipart/form-data</code></q> is required for file upload (multipart form data)</li>
					</ul>
					<li>The server side must be prepared to receive <code>POST</code> requests</li>
					<ul>
						<li>it must parse the entity rather than the URI's query string</li>
						<li>form values can then be extracted from the entity</li>
						<li>some environments (e.g., PHP) allow to handle <code>GET</code>/<code>POST</code> transparently</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Processing of Form Data</title>
				<ul>
					<li>Form data is always encoded</li>
					<ul>
						<li>as a query string when using <code>GET</code></li>
						<li>in an encoded entity when using <code>POST</code></li>
						<li>in a multipart entity when using <code>POST</code> with <code>multipart/form-data</code></li>
					</ul>
					<li>Parsing the form data should be done by existing software</li>
					<ul>
						<li>most Web-aware programming environments provide this functionality</li>
						<li>PHP allows access through different mechanisms</li>
					</ul>
				</ul>
				<listing src="form-variables.php" line="8-11"/>
			</slide>
		</part>
		<part>
			<title>Structuring Forms</title>
			<slide>
				<title>Form Usability</title>
				<ul>
					<li>HTML forms are very loosely structured</li>
					<ul>
						<li><htmel>form</htmel> somewhere representing the container</li>
						<li>inside the <htmel>form</htmel> a random collection of HTML and form inputs</li>
					</ul>
					<li>Visually, the structure often is (and should be) easy to see</li>
					<ul>
						<li>for non-visual access, more structure must be provided</li>
						<li>accessibility has become a major issue on the Web</li>
					</ul>
					<li>Accessibility has many different facets</li>
					<ul>
						<li>voice browsers must be able to read aloud Web forms</li>
						<li>gateways should be able to intelligently re-structure Web forms</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Labels</title>
				<ul>
					<li>Label and form control are not connected by HTML</li>
				</ul>
	<pre><![CDATA[<tr><td>Text:</td><td><input type="text" name="text"/></td></tr>
<tr><td>Password:</td><td><input type="password" name="password"/></td></tr>]]></pre>
				<ul>
					<li>The <htmel>label</htmel> element allows this connection be made</li>
					<ul>
						<li>it connects a form control with the describing label</li>
						<li>this association is now accessible to clients for processing</li>
					</ul>
				</ul>
	<pre><![CDATA[<tr>
 <td><label for="textctrl">Text:</label></td>
 <td><input type="text" name="text" id="textctrl"/></td>
 </tr>
 <tr>
 <td><label for="pwdctrl">Password:</label></td>
 <td><input type="password" name="password" id="pwdctrl"/></td>
</tr>]]></pre>
			</slide>
			<slide>
				<title>Fieldsets</title>
				<ul>
					<li>Complex forms may need structuring</li>
					<ul>
						<li>groups of controls for subsets of the collected data</li>
						<li>this structure should be represented in markup</li>
					</ul>
				</ul>
				<pre><![CDATA[<fieldset><legend>Billing</legend>billing form HTML …</fieldset>
<fieldset><legend>Shipping</legend> shipping form HTML … </fieldset>]]></pre>
				<ul>
					<li>Excellent example for HTML's markup design philosophy</li>
					<ul>
						<li>if a client does not support fieldsets, the elements are ignored</li>
						<li>the title of the fieldset will be displayed, because it is text</li>
					</ul>
				</ul>
				<div style="margin : 2%">
					<fieldset><legend>Billing</legend><em>billing form controls …</em></fieldset>
				</div>
				<div style="margin : 2%">
					<fieldset><legend>Shipping</legend><em>shipping form controls …</em></fieldset>
				</div>
			</slide>
			<slide>
				<title>Tabbing in Forms</title>
				<ul>
					<li>Tabbing is a very convenient way of navigating a form</li>
					<ul>
						<li>after completing one field, users should be taken to the next</li>
						<li>the order should be defined by the form creator, not by accident</li>
					</ul>
					<li>the <html>tabindex</html> attribute defines the tabbing order</li>
					<ul>
						<li>it contains a number which is interpreted relative to other numbers</li>
						<li>all form controls may carry a <html>tabindex</html> attribute</li>
					</ul>
					<li><html>tabindex</html> 1-9:
						<select tabindex="1"><option>1</option></select>
						<select tabindex="7"><option>7</option></select>
						<select tabindex="3"><option>3</option></select>
						<select tabindex="6"><option>6</option></select>
						<select tabindex="8"><option>8</option></select>
						<select tabindex="2"><option>2</option></select>
						<select tabindex="4"><option>4</option></select>
						<select tabindex="5"><option>5</option></select>
						<select tabindex="9"><option>9</option></select>
					</li>
				</ul>
			</slide>
			<slide>
				<title>Disabled and Readonly Controls</title>
				<ul>
					<li>In complex scenarios, certain controls may be disabled or readonly</li>
					<ul>
						<li>based on a workflow, some controls may not apply in all cases</li>
						<li>for the sake of a consistent view, they should still be included in the interface</li>
					</ul>
					<li>Disabled controls are not used</li>
					<ul>
						<li>they cannot be tabbed to and never receive focus</li>
						<li>their value is not included in the form's submission data</li>
					</ul>
					<li>Readonly controls cannot be changed</li>
					<ul>
						<li>they can be tabbed to and may receive focus</li>
						<li>their <em>value</em> may not be changed (important for radio buttons and checkboxes!)</li>
						<li>their value is included in the form's submission data</li>
					</ul>
					<li>Important: Never trust the Browser!</li>
				</ul>
			</slide>
			<slide>
				<title>Disabled and Readonly Controls Display</title>
					<table style="margin : 2% ; width : 90% ; " rules="groups" cellpadding="5">
						<thead>
							<td/>
							<th>Normal Control</th>
							<th>Disabled Control</th>
							<th>Readonly Control</th>
						</thead>
						<tbody>
							<tr><td valign="top" align="right">Text:</td><td><input type="text" name="text1" value="text input"/></td><td><input disabled="disabled" type="text" name="text2" value="text input"/></td><td><input readonly="readonly" type="text" name="text3" value="text input"/></td></tr>
							<tr><td valign="top" align="right">Password:</td><td><input type="password" name="password1" value="hidden text"/></td><td><input disabled="disabled" type="password" name="password2" value="hidden text"/></td><td><input readonly="readonly" type="password" name="password3" value="hidden text"/></td></tr>
							<tr><td valign="top" align="right">Checkbox:</td><td><input type="checkbox" name="check1" value="1"/> <input type="checkbox" name="check1" checked="checked" value="2"/> <input type="checkbox" name="check1" value="3"/></td><td><input disabled="disabled" type="checkbox" name="check2" value="1"/> <input disabled="disabled" type="checkbox" name="check2" checked="checked" value="2"/> <input disabled="disabled" type="checkbox" name="check2" value="3"/></td><td><input readonly="readonly" type="checkbox" name="check3" value="1"/> <input readonly="readonly" type="checkbox" name="check3" checked="checked" value="2"/> <input readonly="readonly" type="checkbox" name="check3" value="3"/> !</td></tr>
							<tr><td valign="top" align="right">Radio Button:</td><td><input type="radio" name="radio1" value="1"/> <input type="radio" name="radio1" checked="checked" value="2"/> <input type="radio" name="radio1" value="3"/></td><td><input disabled="disabled" type="radio" name="radio2" value="1"/> <input disabled="disabled" type="radio" name="radio2" checked="checked" value="2"/> <input disabled="disabled" type="radio" name="radio2" value="3"/></td><td><input readonly="readonly" type="radio" name="radio3" value="1"/> <input readonly="readonly" type="radio" name="radio3" checked="checked" value="2"/> <input readonly="readonly" type="radio" name="radio3" value="3"/> !</td></tr>
							<tr><td valign="top" align="right">File Upload:</td><td><input name="file1" type="file"/></td><td><input disabled="disabled" name="file2" type="file"/></td><td><input readonly="readonly" name="file3" type="file"/> !</td></tr>
							<tr><td valign="top" align="right">Text Areas:</td><td><textarea name="textarea1" rows="2" cols="20">initial text</textarea></td><td><textarea disabled="disabled" name="textarea2" rows="2" cols="20">initial text</textarea></td><td><textarea readonly="readonly" name="textarea3" rows="2" cols="20">initial text</textarea></td></tr>
							<tr><td valign="top" align="right">Selection:</td><td><select name="select1"><option selected="selected">XML</option><option>SGML</option></select></td><td><select disabled="disabled" name="select2"><option selected="selected">XML</option><option>SGML</option></select></td><td rowspan="2" valign="middle">[ not supported ]</td></tr>
							<tr><td valign="top" align="right">Multiple Selection:</td><td><select name="mselect1" multiple="multiple"><option>242</option><option>290-3</option><option>290-13</option></select></td><td><select disabled="disabled" name="mselect2" multiple="multiple"><option>242</option><option>290-3</option><option>290-13</option></select></td></tr>
						</tbody>
					</table>
			</slide>
			<slide>
				<title>Markup for Disabled/Readonly Controls</title>
				<listing src="forms.html" line="9-19" title="Examples for Disabled and Readonly Controls"/>
			</slide>
		</part>
		<slide>
			<title>Conclusions</title>
			<ul>
				<li>HTML forms allow data entry on Web pages</li>
				<li>Only a small number of form controls are available</li>
				<li>Form submission is just another HTTP request</li>
				<li>Forms should be structured to be more accessible</li>
			</ul>
		</slide>
	</presentation>
    <presentation id="microformats">
        <title>Microformats</title>
        <date>2009-09-22</date>
		<toc class="reading"><a href="http://en.wikipedia.org/wiki/Microformat" title="Wikipedia: Microformat">Wikipedia</a></toc>
        <toc class="resources"><a href="http://microformats.org/" title="microformats.org">Microformats</a>&#160;· <a href="http://www.xfront.com/microformats/" title="Tutorials on Microformats">Tutorials</a></toc>
        <toc class="abstract">HTML pages are for human users and describe a resource in structural terms (headings, lists, tables, …). For machine-based interaction, it is often necessary to have more information about the application concepts. XML is a popular language for representing application structures, but is targeted at machine-based processing alone. <em>Microformats</em> and more formal approaches such as the <em>Resource Description Format (RDF)</em>, <em>RDF in Attributes (RDFa)</em>, and <em>Web Ontology Language (OWL)</em> often are used to describe Web content semantically.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>HTML vs. XML</title>
			<ul>
				<li>HTML describes structures in a very general way</li>
				<ul>
					<li>HTML elements describe logical page structures such as headings, lists, tables, …</li>
					<li>useful for dynamic and adaptive page rendering, but not for understanding contents</li>
				</ul>
				<li>Good HTML may have more information available</li>
				<ul>
					<li>classes in HTML elements may represent underlying concepts (CSS may use this)</li>
					<li><link href="html-containers">HTML containers</link> may represent aggregation of some basic information items</li>
				</ul>
				<li>Very good HTML</li>
				<ul>
					<li>some guidelines/rules/methods for <em>understanding</em> class names</li>
					<li>some model for the underlying schema (what may appear in which combination)</li>
				</ul>
				<li>Excellent HTML is dynamically generated from XML</li>
				<ul>
					<li>the model is exposed as structured XML data that is available to the client</li>
					<li>there is a stylesheet for producing the HTML version of the XML</li>
					<li>but even XML does not provide semantics (it is just a structured syntax)</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Plain HTML</title>
			<listing src="systemsix-plain.html"/>
		</slide>
		<slide>
			<title>Good HTML</title>
			<listing src="systemsix-good.html"/>
		</slide>
		<slide>
			<title>Excellent HTML</title>
			<listing src="systemsix.xml"/>
		</slide>
		<slide>
			<title>XML → HTML Stylesheet</title>
			<listing src="bike2html.xsl"/>
		</slide>
		<slide>
			<title>Graceful Degradation</title>
			<ul>
				<li>XML was designed as a language for Web content</li>
				<ul>
					<li>the idea was that XML documents would be delivered to the browser</li>
					<li>stylesheets (CSS/XSL) would take care of the client-side rendering</li>
				</ul>
				<li><link href="css">CSS</link> is good at supporting graceful degradation</li>
				<ul>
					<li>viewing an HTML page with CSS turned off most of the time works fine</li>
				</ul>
				<li>XSLT is not good at supporting graceful degradation</li>
				<ul>
					<li>the browser just displays the raw XML when XSLT is not supported</li>
				</ul>
				<li>Serving XML on the Web is not a good idea</li>
				<ul>
					<li>in closed scenarios (intranet applications) this is a viable solution</li>
					<li>in open scenarios, HTML should be served as the default representation</li>
					<li>alternate versions can be provided by supporting <link href="http-conneg"/></li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Excellent HTML</title>
			<listing src="systemsix-excellent.html"/>
		</slide>
		<slide>
			<title>From Information, Knowledge</title>
			<ul>
				<li>XML is often said to be <q>self-describing</q></li>
				<ul>
					<li>many people think this is the same as <q>self-explanatory</q></li>
					<li>the catch is what exactly it is you refer to by <q>describing</q></li>
				</ul>
				<li>Database data cannot live without a database</li>
				<ul>
					<li>database data is simply content, the structure is provided by a DBMS</li>
					<li>XML documents have their structure encoded within them</li>
					<li>compared to database data, XML in fact is <q>self-describing</q></li>
				</ul>
				<li>What is the gap between <q>self-describing</q> and <q>self-explanatory</q>?</li>
				<ul>
					<li>it is impossible to find out how the document could be modified</li>
					<li>there are no semantics associated with structure or content</li>
					<li>so <q>self-describing</q> means, you can guess a lot, but you maybe wrong</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>The Semantic Web Hype</title>
			<blockquote>1965, H. A. Simon: <q href="http://en.wikipedia.org/wiki/Artificial_intelligence#_note-11">machines will be capable, within twenty years, of doing any work a man can do</q><br />1967, Marvin Minsky: <q href="http://en.wikipedia.org/wiki/Artificial_intelligence#_note-12">Within a generation [ … ] the problem of creating <q>artificial intelligence</q> will substantially be solved.</q></blockquote>
			<ul>
				<li>How to get past the limitations of HTML?</li>
				<ul>
					<li>a machine-friendly Web must make Web resources machine-processable</li>
					<li>XML solved the problem on the syntax level</li>
					<li>how could the problem be solved on the level of semantics?</li>
				</ul>
				<li>As in the 1970's, <em>description logic</em> was declared as being the solution</li>
				<ul>
					<li>there was a need for the Web to move towards semantics</li>
					<li>there was a community of AI researchers with a long history</li>
					<li>the <em>Semantic Web</em> was born and is currently repeating AI history</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Semantic Web Layer Cake</title>
			<img style="width : 90% ; margin : 2% ; " src="semantic-web-layers.png" href="http://www.w3.org/2001/12/semweb-fin/w3csw"/>
		</slide>
		<part>
			<title>Microformats</title>
			<slide>
				<title>Islands of Semantics</title>
				<ul>
					<li>Microformats solve very specific problems in a very specific way</li>
					<ul>
						<li>encoding address information on a Web page</li>
						<li>encoding a location of something represented by a Web resource</li>
					</ul>
					<li>Microformats can be compared to <q>tagging</q></li>
					<ul>
						<li>a very simple mechanism with a minimal barrier-to-entry</li>
						<li>little flexibility in adapting the mechanism to slightly other uses</li>
						<li>often underspecified and interpretation implementation-dependent</li>
						<li>no unified rules across different platforms which makes processing hard</li>
						<li>nice and easy to start with, but questionable for robust long-term solutions</li>
					</ul>
					<li>Currently there are about 10 reasonably popular microformats</li>
					<ul>
						<li><a href="http://microformats.org/wiki/Main_Page">calendar entries, addresses, licenses, outlines, geolocation, resumes, social networking, …</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Microformat Syntax</title>
				<ul>
					<li>HTML has some underspecified and underused elements</li>
					<ul>
						<li><htmel>dfn</htmel>, <htmel>code</htmel>, <htmel>samp</htmel>, <htmel>kbd</htmel>, <htmel>var</htmel>, <htmel>cite</htmel>, <htmel>abbr</htmel>, <htmel>acronym</htmel></li>
						<li>they can be reused and augmented with additional information</li>
					</ul>
					<li>HTML allows non-HTML content in HTML pages</li>
					<ul>
						<li>unknown elements and attributes must be ignored</li>
					</ul>
					<li>HTML allows <html>class</html> attributes to carry semantics</li>
					<li>HTML has a <htmel>head</htmel> which contains page metadata</li>
					<ul>
						<li>for example, the <htmel>link</htmel> element specifies connections to other resources</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Magic Names</title>
				<ul>
					<li>A syntax defines where and how to embed information</li>
					<ul>
						<li>what is embedded and how well is it defined semantically?</li>
						<li>is there an underlying model for specifying dependencies?</li>
						<li>how many assumptions does it take to implement a microformat?</li>
					</ul>
					<li>Names are never self-explanatory, they always represent concepts</li>
					<ul>
						<li>nothing can remove the burden of defining a conceptual model</li>
						<li>if this is not done, models evolve and there will be more than one</li>
					</ul>
					<li><q>Microformats</q> and <q>tagging</q> share the same folklore</li>
					<ul>
						<li>define simple things and good things will happen</li>
						<li>this works by supporting a quickly growing ecosystem of diverging semantics</li>
						<li>semantics are most useful when they are well-defined</li>
						<li>loose semantics also have some utility</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Microformats on the Web</title>
				<ul>
					<li>Easy to embed for generated content</li>
					<ul>
						<li>some of the very basic formats may even appear in browsers one day</li>
						<li>combining well-designed URIs with document relationships is better than every site map</li>
					</ul>
					<li>Hard to rely on for applications that need dependable semantics</li>
					<ul>
						<li>useful as a hint and as a starting point</li>
						<li>microformats are not a good idea for complex information management tasks</li>
					</ul>
					<li>Use as foundation for representing common concepts</li>
					<ul>
						<li>when formatting addresses, use <html href="http://microformats.org/wiki/adr">adr</html> class names</li>
						<li>for structured documents use <html href="http://microformats.org/wiki/xoxo">XOXO</html></li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="rdf">
			<title short="RDF">Resource Description Framework (RDF)</title>
			<slide>
				<title>Describing Resources</title>
				<ul>
					<li>RDF describes everything in <em>triples</em></li>
					<ul>
						<li>making a statement about a <em>resource</em> (identified by a <link href="uri">URI</link>)</li>
						<li>describing a certain <em>property</em> of the resource (identified by a <link href="uri">URI</link>)</li>
						<li>specifying a <em>value</em> for that property (a <link href="uri">URI</link> or a <q>literal</q>)</li>
					</ul>
				</ul>
				<pre href="http://www.w3.org/TR/REC-rdf-syntax/#intro"><![CDATA[<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:contact="http://www.w3.org/2000/10/swap/pim/contact#">
  <contact:Person rdf:about="http://www.w3.org/People/EM/contact#me">
    <contact:fullName>Eric Miller</contact:fullName>
    <contact:mailbox rdf:resource="mailto:em@w3.org"/>
    <contact:personalTitle>Dr.</contact:personalTitle> 
  </contact:Person>
</rdf:RDF>]]></pre>
			</slide>
			<slide>
				<title>RDF Graphs</title>
				<img src="rdf-graph.png" style="height : 75% ; margin : 2% ; " href="http://www.w3.org/TR/REC-rdf-syntax/#intro"/>
			</slide>
			<slide>
				<title>RDF is Simple and Complex</title>
				<ul>
					<li><a href="http://www.w3.org/TR/rdf-concepts/">RDF's abstract model</a> is the idea of descriptive triples</li>
					<ul>
						<li>the actual RDF model is rooted in <em>description logic</em></li>
						<li>RDF itself can only describe individuals (something identified by URI)</li>
					</ul>
					<li><a href="http://www.w3.org/TR/rdf-syntax-grammar/">RDF/XML</a> is an XML syntax for encoding triples</li>
					<ul>
						<li>the syntax allows a variety of ways to represent the same RDF statements</li>
						<li>processing RDF/XML with XML tools is likely to fail</li>
						<li>use RDF parsers to parse all variations of RDF/XML into an abstract RDF graph</li>
					</ul>
					<li><a href="http://www.w3.org/TR/rdf-schema/">RDF Schema</a> supports the creation of <em>RDF vocabularies</em></li>
					<ul>
						<li>describe the <em>classes of things</em> that can be used in statements</li>
						<li>describe the <em>properties</em> which can be used for each of these classes</li>
						<li>describe the <em>allowed</em> values for the supported properties</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>RDF Schema Graph</title>
				<img src="rdfs-graph.png" style="height : 75% ; margin : 2% ; " href="http://www.w3.org/TR/REC-rdf-syntax/#schemaclasses"/>
			</slide>
			<slide id="rdfa">
				<title short="RDFa">RDF in Attributes (RDFa)</title>
				<ul>
					<li>Microformats can use any kind of markup design</li>
					<ul>
						<li>this makes it hard to detect microformats when processing a Web page</li>
						<li>combining microformats can become complicated and ill-designed</li>
					</ul>
					<li>RDFa defines a syntax for embedding RDF into HTML</li>
					<ul>
						<li>the vocabulary must be described by some RDF schema language</li>
					</ul>
				</ul>
				<pre><![CDATA[<p>This document is licensed under a <a xmlns:cc="http://creativecommons.org/licenses/" rel="cc:license" href="http://creativecommons.org/licenses/by/nc-nd/3.0/">Creative Commons License</a>.</p>]]></pre>
				<ul>
					<li>RDFa uses and extends HTML for embedding RDF</li>
					<ul>
						<li>it uses HTML's <html>rel</html>, <html>rev</html>, <html>href</html>, and <html>src</html> attributes</li>
						<li>it defines a number of <a href="http://www.w3.org/TR/rdfa-syntax/#s_metaAttributes">new attributes for HTML elements</a></li>
						<li>it defines a <a href="http://www.w3.org/TR/rdfa-syntax/#s_model">processing model</a> for deriving RDF triples from these attributes</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="semweb-advanced">
			<title>More Languages</title>
			<slide id="sparql">
				<title>SPARQL</title>
				<ul>
					<li>RDF graphs can be large and hard to handle</li>
					<ul>
						<li>querying RDF graphs using XML technologies is hard and slow</li>
						<li>special data structures need special query languages</li>
						<li>SPARQL is a query language for querying RDF graphs</li>
					</ul>
					<li>Using RDF without using SPARQL does not make a lot of sense</li>
					<ul>
						<li>if the data is simple and restricted, why use RDF?</li>
						<li>processing unrestricted RDF without a special language is very hard</li>
					</ul>
					<li>Semantic Web search engines can harvest the Web for RDF</li>
					<ul>
						<li>the result is a huge graph of RDF describing all semantic Web resources</li>
						<li>querying into this graph retrieves all formalized semantics on the Web</li>
					</ul>
				</ul>
			</slide>
			<slide id="owl">
				<title short="OWL">Web Ontology Language (OWL)</title>
				<ul>
					<li>RDF and RDF Schema are rather basic languages</li>
					<li>OWL adds more sophisticated features to RDF Schema</li>
					<ul>
						<li>constructions of classes using existing ones</li>
						<li>characterize relationships (e.g., whether they are transitive, symmetric, functional, etc.)</li>
					</ul>
					<li>Formal semantics are hard to write and compute</li>
					<ul>
						<li>no property expressions or datatypes in RDF Schemas</li>
						<li>not all set operators, restricted cardinality in <em>OWL Lite</em></li>
						<li>some restrictions, but a computational guarantee in <em>OWL DL</em></li>
						<li>full expressive power in <em>OWL Full</em> (but no computational guarantee)</li>
					</ul>
				</ul>
			</slide>
			<slide id="vocabularies">
				<title>Vocabulary Taxonomy</title>
				<img src="vocabulary-taxonomy.png" style="width : 90% ; margin : 2% ; " title="Controlled Vocabularies, Taxonomies, Thesauri, Ontologies"/>
			</slide>
		</part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Some Questions</title>
				<ul>
					<li>Is the world something that can be objectively formalized and described?</li>
					<li>If the conceptualization of the world changes, what about the ontology?</li>
					<li>How can ontology users understand a large ontology?</li>
					<li>Should users trust ontologies which are based on strict categorization?</li>
					<li>How much responsibility should we delegate to formalisms?</li>
					<li>Can <a href="http://video.google.com/videoplay?docid=-7704388615049492068">computer formalizations fully capture semantics</a>?</li>
				</ul>
			</slide>
			<slide>
				<title>Semantics are Important and Hard</title>
				<ul>
					<li>Semantics must be captured somewhere</li>
					<li>Most semantic definitions are using prose and some formalism</li>
					<li>Completely formal semantics are hard to define and hard to use</li>
					<li>Semantic Web technologies may share the fate of AI</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="cms">
        <title short="CMS">Content Management System (CMS)</title>
        <date>2009-09-24</date>
		<toc class="reading"><a href="http://en.wikipedia.org/wiki/Content_management_system" title="Wikipedia: Content Management System">Wikipedia&#160;(CMS)</a>&#160;· <a href="http://en.wikipedia.org/wiki/Web_content_management_system" title="Wikipedia: Web Content Management System">Wikipedia&#160;(WCMS)</a></toc>
        <toc class="resources"><a href="http://httpd.apache.org/docs/2.2/" title="Apache HTTP Server Documentation">Apache</a>&#160;· <a href="http://drupal.org/handbooks" title="Drupal Documentation">Drupal</a>&#160;· <a href="http://www.marklogic.com/product/marklogic-server.html" title="MarkLogic Overview">MarkLogic</a></toc>
        <toc class="assignment"><a href="a/2/">A2</a>&#160;assigned (due&#160;date:&#160;10/4)</toc>
        <toc class="abstract">The fundamental architecture of the Web only requires a Web server capable of answering HTTP requests on the server side. The question, however, is what that content server is serving when responding to requests. The content served by Web servers may come from files, from some form of managed more or less static content, or from dynamic processes. In this lecture, the idea of a <em>Content Management System (CMS)</em> or, more specifically, a <em>Web Content Management System (WCMS)</em>, is introduced in a structured and disciplined way.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Content on the Web</title>
			<ul>
				<li>Web technologies describe how to get content into browsers</li>
				<ul>
					<li><link href="html">HTML</link> is the universally supported representation for content</li>
					<li><link href="http">HTTP</link> allows browsers to <http>GET</http> information from servers</li>
				</ul>
				<li>Resources are never transmitted or displayed</li>
				<ul>
					<li>browsers only display <em>resource representations</em></li>
					<li>how a representation is produced is entirely up to the server</li>
				</ul>
				<li>Managing resources and producing representations is a core Web task</li>
				<ul>
					<li>resource management often is done using some proprietary system</li>
					<li>mapping resources to representations should be done by rules</li>
					<li>in many scenarios, resources are <em>content units</em></li>
				</ul>
				<li>A <em>Content Management System (CMS)</em> manages any kind of content</li>
				<ul>
					<li>and it does not necessarily provide Web access</li>
				</ul>
				<li>A <em>Web Content Management System (WCMS)</em> has Web-specific functionality</li>
				<ul>
					<li>support for Web representations (HTML &amp; CSS)</li>
					<li>support for Web patterns (navigation bars)</li>
				</ul>
			</ul>
		</slide>
		<part id="cms-web">
			<title>Content on the Web</title>
			<slide>
				<title>Content and Structure</title>
				<ul>
					<li>Content is what matters most</li>
					<ul>
						<li>content itself often has some internal structure</li>
						<li>content may explicitly link to other content</li>
					</ul>
					<li>Macro-structure often is more representation than content</li>
					<ul>
						<li>displaying the current context of the content</li>
						<li>displaying related content</li>
						<li>displaying some overall structure (site navigation)</li>
					</ul>
					<li>Content often is reusable across application scenarios</li>
					<ul>
						<li>publishers have used <em>Content Management Systems</em> for a long time</li>
						<li>adding a new publication channel to a CMS should be evolutionary</li>
					</ul>
					<li>Content may require very different support depending on the channel</li>
					<ul>
						<li>newspapers need fine-tuned layout and good control over content size</li>
						<li>Web needs good interlinking and navigation</li>
					</ul>
				</ul>
			</slide>
			<slide id="cms-evolution">
				<title>CMS Evolution</title>
				<ol>
					<li>Web servers reading from files</li>
					<li>Web servers implementing primitive content management (SSI)</li>
					<li>Scripting languages implementing better management</li>
					<li>Management code getting hooked up to databases</li>
					<li>Better handling of client-specific behavior</li>
					<li>Databases getting more diverse (RDB, XML, RDF)</li>
				</ol>
			</slide>
		</part>
		<part id="cms-content">
			<title><q>Content</q> in CMS</title>
			<slide>
				<title>Serving Content from Files</title>
				<img style="width : 90% ; margin : 2% ; " src="wcms-fs-only.png"/>
			</slide>
			<slide>
				<title>The Rise of the CMS</title>
				<ul>
					<li>File-based content management works well for small sites</li>
					<ul>
						<li>simple site structure and small number of files</li>
						<li>redundant parts can be manually synchronized</li>
						<li>no software is required other than a Web server</li>
					</ul>
					<li>Web servers soon developed rudimentary CMS functions (<a href="http://httpd.apache.org/docs/2.2/howto/ssi.html" title="Server Side Includes">SSI</a>)</li>
					<ul>
						<li>rudimentary support is better than no support</li>
						<li>managing a non-trivial setup with SSI still is a challenge</li>
						<li>SSI allows includes but no backlinks and thus hides dependencies</li>
					</ul>
					<li>Content management is very similar to code management</li>
					<ul>
						<li>simple setups require no or little tool support</li>
						<li>serious projects need tools to manage dependencies and changes</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Serving Content from Files with SSI</title>
				<img style="width : 90% ; margin : 2% ; " src="wcms-fs-ssi.png"/>
			</slide>
			<slide id="content-files">
				<title>Files (Opaque Chunks)</title>
				<ul>
					<li>All major operating systems have file systems</li>
					<li>Files are typically treated as opaque chunks of data</li>
					<li>Applications may have special knowledge of file contents</li>
					<li>Advantages of files:</li>
					<ul>
						<li>universally supported across major operating systems</li>
						<li>storage and user management comes for free</li>
						<li>all that is needed for a Web site is a Web server</li>
					</ul>
					<li>Disadvantages of files:</li>
					<ul>
						<li>content access requires file system access</li>
						<li>setting up parallel servers requires additional effort</li>
						<li>no support for managing structure, everything handcoded</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>File Systems are Databases</title>
				<ul>
					<li>A file system is a simple hierarchical database</li>
					<ul>
						<li>it does not know data types and simply stores any content</li>
						<li>its structure is a tree with a few extra tricks (such as symlinks)</li>
					</ul>
					<li>Many scenarios have much more structured data models</li>
					<ul>
						<li>products, people, financial institutions all have complex data models</li>
						<li>content should be stored and queried based on these models</li>
					</ul>
					<li>Databases are better optimized for storing structured content</li>
					<ul>
						<li>better methods for structured storage and retrieval</li>
						<li>better strategies for managing large datasets</li>
						<li>sophisticated tools for access control, backup, and versioning</li>
					</ul>
				</ul>
			</slide>
			<slide id="content-tables">
				<title>Tables (Relational Model)</title>
					<ul>
						<li>Most widely used model for large collections of structured data</li>
						<li>Very mature products and many skilled people available</li>
						<li>The biggest advantage is that it is not hierarchical (no structure bias)</li>
						<li>Advantages of relations:</li>
						<ul>
							<li>well-understood model and maps well to existing data</li>
							<li>the non-hierarchical model allows views from different perspectives</li>
							<li>highly scalable solutions available</li>
						</ul>
						<li>Disadvantages of Relations:</li>
						<ul>
							<li>bad for sequences and variable structures (choices, repetitions, …)</li>
							<li>very bad for structured documents</li>
						</ul>
					</ul>
			</slide>
			<slide>
				<title>ER Model</title>
				<img style="height : 70% ; margin : 2% ; " src="ER-Diagram.png" href="http://en.wikipedia.org/wiki/Entity-relationship_model" title="Wikipedia: Entity-Relationship Model"/>
			</slide>
			<slide id="content-xml">
				<title>Ordered Trees (XML)</title>
					<ul>
						<li><link href="xml">XML</link> has a heritage of document processing</li>
						<li>XML tools can be used standalone and are widely supported</li>
						<li>XML and HTML have a very similar foundation</li>
						<li>XML has two built-in directions: hierarchy and ordered children</li>
						<li>Advantages of XML:</li>
						<ul>
							<li>maps well to HTML and XHTML</li>
							<li>well-suited for document-oriented content</li>
						</ul>
						<li>Disadvantages of XML:</li>
						<ul>
							<li>not good at representing non-tree data</li>
							<li>databases not as mature as relational products</li>
						</ul>
					</ul>
			</slide>
			<slide>
				<title>XML Content</title>
				<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smiley.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>
				<img style="width : 90% ; margin : 4% ;" src="mixed-content.png" title="XML tree for mixed content"/>
			</slide>
			<slide id="content-rdf">
				<title>Directed Graphs (RDF)</title>
					<ul>
						<li>RDF is the metamodel of the <link href="semweb">Semantic Web</link></li>
						<li>Highly granular, less rigid than tables, less ordered than trees</li>
						<li>Advantages of RDF:</li>
						<ul>
							<li>any structure can be mapped to RDF triples</li>
							<li>support still limited but getting better</li>
						</ul>
						<li>Disadvantages of RDF:</li>
						<ul>
							<li>no model for document boundaries and self-contained units</li>
							<li>bad for sequences</li>
							<li>very bad for structured documents</li>
						</ul>
					</ul>
			</slide>
			<slide>
				<title>Choose a Matching Metamodel</title>
				<ul>
					<li>Content has some <q>inherent</q> metamodel properties</li>
					<ul>
						<li>forcing that into a different metamodel is possible but unwise</li>
					</ul>
					<li>Using a metamodel which best matches a model is crucial</li>
					<ul>
						<li>if you have large collections of rigid and highly-structured data: Tables</li>
						<li>if you have structured documents with rich text: XML</li>
						<li>if you have  fine-granular graph-structured data: RDF</li>
					</ul>
					<li>Mapping is always possible but has severe limitations</li>
					<ul>
						<li>things that work effortlessly in one metamodel may be awkward in another</li>
						<li>there is no such thing as <q>the one metamodel for all needs</q></li>
						<li>RDF's claim to be the one metamodel for everything is not backed by facts</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="cms-management">
			<title><q>Management</q> in CMS</title>
			<slide>
				<title>Deconstructing Management</title>
				<ul>
					<li>What is managed?</li>
					<li>Who is managing it?</li>
					<li>What are the management support functions?</li>
					<li>Are there workflows and processes?</li>
					<li>Is the management integrated with other processes?</li>
					<li>Is it likely that processes will be followed?</li>
				</ul>
			</slide>
			<slide>
				<title>Managing Content with Files</title>
				<img style="width : 90% ; margin : 2% ; " src="wcms-fs-vcs.png"/>
			</slide>
			<slide>
				<title>Integrated Management Functions</title>
				<ul>
					<li>Separating management and publishing does not work well</li>
					<ul>
						<li>typical examples for workflows are review and release processes</li>
						<li>oftentimes publishing-specific roles are required</li>
					</ul>
					<li>Integrated management takes over all tasks</li>
					<ul>
						<li>What is managed? Database of structured content.</li>
						<li>Who is managing it? Registered users based on roles.</li>
						<li>What are the management support functions? Building a site around the content.</li>
						<li>Are there workflows and processes? Can be based on users/roles/content.</li>
						<li>Is the management integrated with other processes? APIs allow extension/integration.</li>
						<li>Is it likely that processes will be followed? Easier to use than homegrown methods.</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="cms-system">
			<title><q>System</q> in CMS</title>
			<slide>
				<title>System Platform</title>
				<ul>
					<li>Systems need runtime environments</li>
					<li>CMS are programs that are installed on some OS</li>
					<ul>
						<li>integrated Web server or connect with Web server</li>
						<li>integrated database or connect with database</li>
						<li>integrated use management or connect with user management</li>
					</ul>
					<li>Typical steps for setting up a CMS</li>
					<ol>
						<li>setting up the runtime environment</li>
						<li>installing the CMS software</li>
						<li>initializing the CMS installation</li>
						<li>migrating existing data into the CMS installation</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Drupal</title>
				<img style="width : 90% ; margin : 2% ; " src="wcms-drupal.png"/>
			</slide>
			<slide>
				<title>MarkLogic</title>
				<ul>
					<li>XML-based content management</li>
					<ul>
						<li>content is stored in XML documents in an XML database</li>
						<li>non-XML content can be stored as well</li>
						<li>programming is done in XQuery using extension functions</li>
					</ul>
					<li>XML-based content management works well for documents</li>
					<ul>
						<li>XML's ordered trees are well-suited to represent documents</li>
						<li><a href="http://markmail.org/">MarkMail</a> provides access to <a href="http://markmail.org/search/?q=erik%20wilde">well-indexed data</a> of 7,227 mailing lists</li>
					</ul>
					<li>XML databases are more efficient than XML files</li>
					<ul>
						<li>XML content is indexed and access is faster</li>
						<li>building server farms is supported by the database management system</li>
					</ul>
				</ul>
			</slide>
		</part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Content vs. Web Pages</title>
				<ul>
					<li>Managing content is one of the prerequisites for Web publishing</li>
					<li>Content management can be done generically or Web-specific</li>
					<li>WCMS provides specific support for Web publishing tasks</li>
					<li>The most important part of a CMS is its metamodel</li>
				</ul>
			</slide>
        </part>
    </presentation>
	<presentation id="xml">
		<title short="XML">Extensible Markup Language (XML)</title>
		<date>2009-09-29</date>
		<toc class="reading"><a href="http://www.w3.org/Press/1998/XML10-REC">XML 1.0 Press Release</a>&#160;· <a href="http://dret.net/netdret/docs/wilde-cacm2008-xml-fever.html" title='Erik Wilde and Robert J. Glushko, "XML Fever", Communications of the ACM, 51(7):40-46, July 2008'>XML&#160;Fever</a>&#160;· <a href="http://www.tbray.org/ongoing/When/200x/2006/01/09/On-XML-Language-Design">On XML Language Design</a></toc>
		<toc class="resources"><a href="http://www.w3.org/TR/REC-xml/" title="W3C XML 1.0 Specification">Spec</a>&#160;· <a href="http://dret.net/netdret/docs/wilde-elpub2006-xml.pdf">Structuring Content with XML</a>&#160;· <a href="http://www.tbray.org/ongoing/When/200x/2008/02/10/XML-People" title="XML People">People</a></toc>
		<toc class="abstract">The <em>Extensible Markup Language (XML)</em> defines a simple way for structuring data. The power and popularity of XML can be explained by its versatility, the platform-independence, the standards and technologies leveraging it, and the number of tools and products supporting it. Understanding XML itself is rather simple, it only depends on a very small set of other technologies. Unicode and URIs are the most important foundations of XML. XML itself specifies two different things: on the one hand the format for structured data, which are called <em>XML documents</em>, and on the other hand a constraint language for XML documents, which is called <em>Document Type Definition (DTD)</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Foundations for XML</title>
			<slide>
				<title>Identifications</title>
				<ul>
					<li>Identification of Character Encodings</li>
					<ul>
						<li>text can be encoded using different character sets and encodings</li>
						<li>IANA maintains the <a href="http://www.iana.org/assignments/character-sets">official list of character encodings</a></li>
						<li>character encoding is about <em>characters</em>, not about <em>text</em></li>
					</ul>
					<li>Identification of Languages</li>
					<ul>
						<li>textual content should be tagged with language information</li>
						<li>specification based on <a href="http://www.loc.gov/standards/iso639-2/langhome.html">ISO 639 language tags</a></li>
						<li>language identification is about <em>text</em>, not about <em>characters</em></li>
					</ul>
				</ul>
			</slide>
			<part id="unicode">
				<title>Unicode</title>
				<slide>
					<title>XML's Idea of Content and Names</title>
					<p>XML documents can use a wide array of characters. They are defined by <a href="http://www.unicode.org/">Unicode</a>, which currently (Version 5.0) defines more than 100,000 characters (#100,000 added in 2005).</p>
					<listing src="japanese1.xml"/>
					<listing src="japanese2.xml"/>
				</slide>
				<slide>
					<title>XML and Unicode</title>
					<ul>
						<li>XML is based on Unicode</li>
						<ul>
							<li>XML is defined in terms of <a href="http://www.w3.org/TR/xml/#sec-starttags">character structures</a></li>
							<li>how these characters are encoded is not part of XML</li>
						</ul>
						<li>How are XML documents encoded?</li>
						<ul>
							<li>applications can use any character encoding they like</li>
							<li>XML processors <em>must</em> support UTF-8 and UTF-16</li>
							<li>XML processors <em>may</em> support any number of additional encodings</li>
						</ul>
						<li>How is the encoding <q>encoded</q>?</li>
						<ul>
							<li>part of the XML document: <code>&lt;?xml version="1.0" encoding="UTF-8"?></code></li>
							<li>bootstrap problem solved heuristically or by out-of-band information</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Uniform Resource Identifier (URI)</title>
				<slide>
					<title>Identifiers are Essential</title>
					<ul>
						<li><em>Uniform Resource Locator (URL)</em> is the old concept</li>
						<ul>
							<li>introduced to distinguish between <em>locating</em> and <em>naming</em></li>
							<li><em>locating</em> and <em>naming</em> are two ways of <em>identification</em></li>
							<li>URLs have been replaced by URIs, technically URLs do not exist anymore</li>
						</ul>
						<li>URIs identify resources</li>
						<ul>
							<li>some resources may be retrieved using a protocol: <code href="">http://dret.net/netdret/</code></li>
							<li>not all resource access is retrieval: <code href="mailto:dret@berkeley.edu">mailto:dret@berkeley.edu</code></li>
							<li>sometimes computers are not required: <code href="tel:+1-510-6432253">tel:+1-510-6432253</code></li>
							<li>or resources cannot be located: <code href="urn:ietf:rfc:2648">urn:ietf:rfc:2648</code></li>
							<li>or location is the only means of identification: <code href="http://maps.google.com/maps?hl=en&amp;ie=UTF8&amp;om=1&amp;ll=27.988262,86.925277&amp;t=k">geo:27.988056;86.925278</code></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>URIs and REST</title>
					<ul>
						<li><em>Representational State Transfer (REST)</em> requires identification</li>
						<ol>
							<li>identify all relevant resources</li>
							<li>design/use representations for those resources</li>
							<li>resources should be linked (via URI) for allowing navigation</li>
						</ol>
						<li>URIs are the <q>API</q> of RESTful applications</li>
						<ul>
							<li>URI-identified resources have a uniform interface (HTTP)</li>
							<li>interaction with those resources is done via HTTP</li>
							<li>clients can navigate the resource/state space by following links</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>XML</title>
			<slide>
				<title>XML Use Cases</title>
				<ul>
					<li>XML is a metalanguage supporting application-specific vocabularies</li>
					<li><em>RSS</em> (and <em>Atom</em>) are XML vocabularies for newsfeeds</li>
					<ul>
						<li><a href="http://docordie.blogspot.com/">Doc or Die</a>: <a href="http://docordie.blogspot.com/rss.xml">RSS feed</a> vs. <a href="http://docordie.blogspot.com/atom.xml">Atom feed</a></li>
						<li>browsers now incorporate and/or integrate newsfeed readers</li>
					</ul>
					<li><em>OpenDocument (ODF)</em> is a language for office application documents</li>
					<ul>
						<li>designed for open and interoperable exchange</li>
						<li>standardized by ISO (which now also standardizes Microsoft's <em>Open XML</em>)</li>
					</ul>
					<li><em>Scalable Vector Graphics (SVG)</em> for portable vector graphics</li>
					<ul>
						<li>designed for embedding in Web pages</li>
						<li>good example for compound documents: <a href="http://www.carto.net/papers/svg/animated_weather_symbols/">HTML containing SVG</a></li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>XML Documents</title>
				<slide>
					<title>Markup?</title>
					<ul>
						<li>Structures are encoded using special characters</li>
						<ul>
							<li>a fundamental difference when comparing to binary formats</li>
							<li>markup languages can be read and modified using text-based tools</li>
							<li>programs must treat markup characters in a special way</li>
						</ul>
						<li>Documents are content interspersed with markup (i.e., structures)</li>
						<ul>
							<li>XML-aware software interprets the markup</li>
							<li>XML-unaware software just sees a text file</li>
							<li>modifications must be made XML-aware (e.g., inserting <q>AT&amp;T</q> as <q>AT&amp;amp;T</q>)</li>
						</ul>
						<li>You have to pay the <link href="markup-price"/></li>
					</ul>
				</slide>
				<slide>
					<title>Basic Concepts</title>
					<ul>
						<li>XML Documents have an <em>XML declaration</em> (optional)</li>
						<li>There is exactly one <em>document element</em> (a.k.a. <em>root element</em>)</li>
						<li>Elements may be nested (there is no conceptual limit)</li>
						<ul>
							<li>elements may be repeated (they can be identified by position)</li>
						</ul>
						<li>Elements are marked up using <em>tags</em></li>
						<ul>
							<li>most elements have content, surrounded by <em>start</em> and <em>end tags</em></li>
							<li>empty elements are allowed and may use a special notation</li>
						</ul>
						<li>Elements may have attributes (zero to any number)</li>
						<ul>
							<li>attributes can only occur once on an element (i.e., they cannot be repeated)</li>
						</ul>
					</ul>
					<listing src="my-first.xml"/>
				</slide>
				<slide id="xmltree">
					<title>Tree Syntax</title>
					<ul>
						<li>Markup is important, but only a notation</li>
						<li>XML documents are trees with different node types</li>
						<ul>
							<li>nodes so far: document, element, attribute, text</li>
						</ul>
						<img style="width : 90% ; margin : 4% ;" src="document-tree.png" title="XML document tree"/>
					</ul>
				</slide>
				<slide id="xmlelements">
					<title>Elements</title>
					<ul>
						<li>Elements can use a <a href="http://www.w3.org/TR/xml/#NT-Name">wide variety of names</a></li>
						<ul>
							<li>Allowed: <elem>html</elem>, <elem>id9832798472</elem>, <elem>_</elem>, <elem>:</elem>, <elem>こんにちは</elem></li>
							<li>Disallowed: leading numbers, spaces, control characters</li>
						</ul>
						<li>Element names usually convey some information about the content</li>
						<ul>
							<li>this is not reliable and highly language-dependent</li>
							<li>it is <em>very useful</em> when working with a known vocabulary</li>
							<li>it is <em>potentially harmful</em> when working with an unknown vocabulary</li>
						</ul>
						<li>Elements are the foundation for XML's versatility</li>
						<ul>
							<li>they can be nested (<code>&lt;address>&lt;city>Berkeley&lt;/city>&lt;zip>94709&lt;/zip>…</code>)</li>
							<li>they can be repeated (<code>&lt;givenname>Erik&lt;/givenname>&lt;givenname>Thomas&lt;/givenname></code>)</li>
							<li>their sequence can convey additional information (given names have a sequence)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Attributes</title>
					<ul>
						<li>Additional information pertaining to elements</li>
						<li>Traditionally, anything that is not considered <q>content</q></li>
						<ul>
							<li>SGML is a document markup language</li>
							<li>XML uses SGML's concepts</li>
							<li>XML has its roots in the document world</li>
						</ul>
						<li>Elements: Content (i.e., Data); Attributes: Metadata</li>
						<li>Documents often distinguish by what is textual content</li>
					</ul>
					<listing src="section.xml" line="12-20"/>
				</slide>
				<slide>
					<title>Attribute Syntax</title>
					<ul>
						<li>Naming rules are the same as for <link href="xmlelements"/></li>
						<li>Attributes always appear within an element's <em>start tag</em></li>
						<li>Attributes are <a href="http://www.w3.org/TR/xml/#NT-Attribute">name/value-pairs</a></li>
						<ul>
							<li>the value is enclosed in single or double quotes</li>
						</ul>
						<li>Attribute with a single-quote value: <elem>elem attr="Single: '"/</elem></li>
						<li>Attribute with a double-quote value: <elem>elem attr='Double :"'/</elem></li>
						<li>How can attribute values contain both?</li>
					</ul>
				</slide>
				<slide id="markup-price">
					<title>The Price for Markup</title>
					<ul>
						<li>Markup characters have a special meaning</li>
						<ul>
							<li><q>&lt;</q> opens a tag</li>
							<li>for attribute values, quotes delimit the value</li>
						</ul>
						<li>The literal use of a markup character requires escaping</li>
						<ul>
							<li>XML's <em>entities</em> can refer to pieces of content</li>
							<li>entity syntax is <code>&amp;name;</code> for referring to the entity <q><code>name</code></q></li>
							<li>XML has 5 <a href="http://www.w3.org/TR/xml/#sec-predefined-ent">predefined entities</a>: <code>&amp;lt;</code>, <code>&amp;gt;</code>, <code>&amp;amp;</code>, <code>&amp;apos;</code>, <code>&amp;quot;</code></li>
						</ul>
						<li>Attribute using both kinds of quotes: <code>&lt;elem attr="Single ' and Double &amp;quot;"/></code></li>
					</ul>
					<pre><![CDATA[<li>Attribute using both kinds of quotes: <code>&lt;elem attr="Single ' and Double &amp;quot;"/></code></li>]]></pre>
				</slide>
				<slide id="mixed-content">
					<title>Mixed Content</title>
					<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smiley.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>
					<pre><![CDATA[<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smiley.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>]]></pre>
					<img style="width : 90% ; margin : 4% ;" src="mixed-content.png" title="XML tree for mixed content"/>
				</slide>
				<slide>
					<title>Mixed Content Usage</title>
					<ul>
						<li>Database people find mixed content irritating</li>
						<ul>
							<li>cannot be easily mapped to relational structures</li>
							<li>is more <em>document-like</em> than <em>data-like</em></li>
							<li>much harder to optimize for query analysis and query processing</li>
						</ul>
						<li>Document people find mixed content very intriguing</li>
						<ul>
							<li>textual content can still be used as simple text</li>
							<li>markup provides additional information for rich text</li>
							<li>start with a text-only document and use markup to add structure to it</li>
						</ul>
					</ul>
				</slide>
				<slide id="whitespace">
					<title>Whitespace</title>
					<ul>
						<li>XML documents often are pretty-printed</li>
						<li><em>Whitespace text nodes</em> often are <q>not really content</q></li>
						<ul>
							<li>XML whitespace characters are <em>space</em>, <em>tab</em>, <em>newline</em>, and <em>carriage return</em></li>
							<li>whitespace text nodes are text nodes containing <em>only</em> whitespace characters</li>
						</ul>
						<img style="width : 90% ; margin : 4% ;" src="document-tree-whitespace.png" title="XML tree with whitespace text nodes"/>
					</ul>
				</slide>
				<slide>
					<title>Significant Whitespace</title>
					<ul>
						<li>Some whitespace text nodes are relevant</li>
						<li>Usually text nodes in <em>mixed content</em> elements</li>
					</ul>
					<p>Whitespace <i>can be</i> <u>very</u> <b>important</b>!</p>
					<pre><![CDATA[<p>Whitespace <i>can be</i> <u>very</u> <b>important</b>!</p>]]></pre>
					<img style="height : 40% ; margin : 2% ;" src="significant-whitespace.png" title="XML tree containing significant whitespace"/>
				</slide>
			</part>
			<part id="wellformed">
				<title>Processing XML</title>
				<slide>
					<title>Observing XML Syntax</title>
					<ul>
						<li>XML's syntax requires you to use the right characters</li>
						<ul>
							<li><a href="http://www.w3.org/TR/xml/#NT-element">the grammar alone</a> allows many XML errors</li>
							<li><a href="http://www.w3.org/TR/xml/#GIMatch">additional constraints</a> ensure that everything is used correctly</li>
						</ul>
						<li><em>XML processors</em> (a.k.a. <em>XML parsers)</em> check for these rules</li>
						<ul>
							<li>if there are problems, the document cannot be interpreted as XML</li>
							<li>otherwise, the document is said to be <em>well-formed</em></li>
						</ul>
						<li>Only well-formed documents can be regarded as a tree</li>
						<ul>
							<li>other documents are not XML at all, even though they may be close</li>
							<li>XML processors must report problems to the application (no <em>silent recovery</em>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Validity</title>
					<ul>
						<li><em>Well-formed documents</em> observe XML rules</li>
						<ul>
							<li>they observe the XML syntax</li>
							<li>they observe all well-formedness constraints</li>
						</ul>
						<li>Applications require the right elements and attributes</li>
						<li><em>Validity</em> is a more comprehensive concept</li>
						<li><em>Valid documents</em> observe additional rules</li>
						<ul>
							<li>they must be well-formed documents</li>
							<li>they must adhere to the constraints defined in a <a href="http://dret.net/lectures/xml-fall09/dtd">Document Type Definition (DTD)</a></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Semantics</title>
					<ul>
						<li>XML is a language for encoding trees</li>
						<ul>
							<li>Elements and attributes are labeled node in this tree</li>
							<li>the labels can be chosen freely by document authors</li>
						</ul>
						<li>The tree's meaning is nothing XML is concerned with</li>
						<ul>
							<li>peers must have a mutual understanding of the semantics</li>
							<li>XML without mutual understanding is almost useless</li>
							<li>reverse engineering often is possible, but it is risky and brittle</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML Documents</title>
				<ul>
					<li>XML documents are structured data using markup</li>
					<li>Elements and Attributes are the main structuring mechanisms</li>
					<li>Elements and Attributes have names, but have no inherent semantics</li>
					<li>For using XML successfully, <em>shared semantics</em> are essential</li>
					<li>Always think about semantics</li>
				</ul>
			</slide>
		</part>		
	</presentation>
	<presentation id="docdatadb">
		<title>Documents, Data, and Databases</title>
		<date>2009-10-01</date>
		<toc class="resources"><a href="http://www.rpbourret.com/xml/XMLAndDatabases.htm" title="Ronald Bourret's XML and Databases FAQ">FAQ</a></toc>
		<toc class="abstract">XML databases often are a good solution for managing document-oriented content, but frequently it is necessary or dictated by existing solutions to use non-XML databases for managing document content. In most cases, these databases will be relational databases. There a two major approaches of how to manage document-oriented content in a relational database. The first approach is to define a mapping between document and relational structures and work with this mapping. The second approach is to use the XML-specific functionality, which is increasingly provided by relational databases, turning them into <em>XML-aware databases</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
 		<slide>
			<title>Documents are Trees</title>
			<ul>
				<li>Many documents are trees or <q>almost trees</q></li>
				<ul>
					<li>applications may have different internal data models</li>
					<li>the exchange and processing of documents often is tree-based</li>
				</ul>
				<li><link href="xml">XML</link> is the most popular format for tree structures</li>
				<li>Where and how is XML being used?</li>
				<ul>
					<li>as a transfer syntax (Web Services often are used like this)</li>
					<li>as artifacts that have a longer lifespan (archiving of business documents)</li>
					<li>as the application data model (there is nothing but XML)</li>
				</ul>
				<li>XML usage results in very different requirements for XML tools</li>
				<ul>
					<li>Web Service programmers often never see the tree</li>
					<li>archived XML documents need to be searchable</li>
					<li>XML-centric applications need to store XML efficiently</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Document Content vs. Document Metadata</title>
			<table width="90%" style="margin : 2%">
				<tr>
					<td valign="top" align="left">
						<ul>
							<li>Document content</li>
							<ul>
								<li>title and abstract</li>
								<li>chapters and sections</li>
								<li>tables, lists, figures</li>
								<li>footnotes, endnotes, sidenotes</li>
								<li>bibliographic references</li>
								<li>annotations</li>
								<li>cross-references</li>
							</ul>
						</ul>
					</td>
					<td valign="top" align="left">
						<ul>
							<li>Document metadata</li>
							<ul>
								<li>title</li>
								<li>creation date</li>
								<li>author</li>
								<li>affiliation (document and/or author)</li>
								<li>version/revision information</li>
								<li>keywords</li>
								<li>summary and/or abstract</li>
							</ul>
						</ul>
					</td>
				</tr>
			</table>
		</slide>
		<slide>
			<title>Metadata Metamodels</title>
			<ul>
				<li>Metadata is often perceived to be key/value data</li>
				<ul>
					<li>long history of catalog cards and their management</li>
					<li>complex metadata is hard to create, maintain, and use</li>
				</ul>
				<li>Metadata management works well in non-tree models</li>
				<ul>
					<li>relational metadata works well for regular metadata</li>
					<li>graph metadata (RDF) works well for any metadata</li>
				</ul>
				<li>Many <em>document management systems</em> manage metadata and not documents</li>
				<ul>
					<li>manage all the document metadata in RDBMS (or RDF)</li>
					<li>treat the documents as BLOBs (PDF or some other opaque format)</li>
					<li>makes it hard to work with document contents and structure</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Storing XML</title>
			<ul>
				<li>XML documents are text files</li>
				<ul>
					<li>they can be stored in file systems (they are <q>self-describing</q>)</li>
					<li>they can be retrieved by searching through the file system</li>
				</ul>
				<li>File systems are not designed to store millions of documents</li>
				<ul>
					<li>standard file system implementation usually slow down dramatically</li>
					<li>standard procedures (backup/versioning/concurrency) do not work well</li>
				</ul>
				<li>Problems with <q>File Systems as XML Databases</q></li>
				<ul>
					<li>the number of documents is too large</li>
					<li>there is no structured access</li>
					<li>there is no access optimization</li>
				</ul>
			</ul>
		</slide>
		<part id="rdbms">
			<title>Relational Databases</title>
			<slide>
				<title>Generic XML Storage</title>
				<ul>
					<li>Relational databases are the state of the art since 1976</li>
					<ul>
						<li>this is long enough to build highly optimized and robust systems</li>
						<li>this is long enough to have ER hard-wired into some brains</li>
					</ul>
					<li>XML is more powerful than ER</li>
					<ul>
						<li>repetitions of elements do not map well</li>
						<li>choices do not map well</li>
						<li>ordered content does not map well</li>
						<li>mixed content does not map well</li>
					</ul>
					<li>Storing XML in a relational database is hard</li>
					<ul>
						<li>it can be done by piggybacking structural information as content</li>
						<li>using the resulting structures is awkward and very inefficient</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Tree Table</title>
				<table width="90%" style="margin : 4%">
					<tr>
						<td valign="middle" align="center">
							<img style="width : 90% ; margin : 2% ; " src="tree-table.png"/>
						</td>
						<td valign="middle" align="center">
							<table style="width : 90% ; " border="1">
								<tr>
									<th>ID</th>
									<th>Type</th>
									<th>Name</th>
									<th>Value</th>
									<th>Parent</th>
									<th>Left</th>
								</tr>
								<tr>
									<td>1</td>
									<td>Root</td>
									<td></td>
									<td></td>
									<td></td>
									<td></td>
								</tr>
								<tr>
									<td>2</td>
									<td>Element</td>
									<td>a</td>
									<td></td>
									<td>1</td>
									<td></td>
								</tr>
								<tr>
									<td>3</td>
									<td>Element</td>
									<td>b</td>
									<td></td>
									<td>2</td>
									<td></td>
								</tr>
								<tr>
									<td>4</td>
									<td>Element</td>
									<td>c</td>
									<td></td>
									<td>2</td>
									<td>3</td>
								</tr>
								<tr>
									<td>5</td>
									<td>Text</td>
									<td></td>
									<td><q>Text</q></td>
									<td>3</td>
									<td></td>
								</tr>
								<tr>
									<td>6</td>
									<td>Attribute</td>
									<td>att</td>
									<td><q>42</q></td>
									<td>4</td>
									<td></td>
								</tr>
							</table>
						</td>
					</tr>
				</table>
			</slide>
		</part>
		<part>
			<title>Database Support for XML</title>
			<slide>
				<title>Why XML and Databases?</title>
				<ul>
					<li>XML is becoming increasingly popular</li>
					<ul>
						<li>XML as a document format was first used as <em>wire format</em></li>
					</ul>
					<li>What is XML for an application?</li>
					<ul>
						<li>an (increasingly popular) way to represent the data?</li>
						<li>the data itself?</li>
						<li>currently, the representation perspective is more popular</li>
						<li>as XML is increasingly penetrating applications, this may change</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML Interchange</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-dbms-application.png"/>
			</slide>
			<slide id="xdbms-dbms-xmlsupport">
				<title>XML Support in DBMS</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-dbms-xmlsupport.png"/>
			</slide>
			<slide>
				<title>XML DBMS</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-dbms-xdbms.png"/>
			</slide>
		</part>
		<part>
			<title>XML Storage in Databases</title>
			<slide>
				<title>Model Mapping</title>
				<ul>
					<li>Relational databases are not good tools for storing XML</li>
					<ul>
						<li>they might be appropriate if the schema disallows problematic constructs</li>
						<li>they often are already deployed and applications must live with them</li>
					</ul>
					<li>If the data model is ER-oriented, relational databases are good tools</li>
					<ul>
						<li>metadata typically is a good fit for tables</li>
					</ul>
					<li>If the XML is not visible in the model, it can be structurally inaccessible</li>
					<ul>
						<li>e.g., a product catalog may contain product descriptions in XHTML rich text snippets</li>
						<li>for managing the product catalog data, the XHTML is not relevant</li>
					</ul>
					<li>If the XML is part of the model, it should be accessible structurally</li>
					<ul>
						<li>if the product catalog XHTML contains links to other products, these links are important</li>
						<li>if they are hidden in the XHTML, all XHTML snippets have to be parsed</li>
						<li>ideally, the database should be able to <q>query the XHTML snippet</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML is Text</title>
				<ul>
					<li>XML documents can be stored as text</li>
					<ul>
						<li>databases typically have various datatypes for text storage</li>
						<li>if the database supports Unicode, any XML document can be stored</li>
					</ul>
					<li>The XML structure is completely invisible to the database</li>
					<ul>
						<li>working with the XML requires querying and parsing the XML text</li>
						<li>this kind of storage does not allow any querying of the XML content</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML → ∗LOB</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-lob.png"/>
			</slide>
			<slide id="xdbms-xmldatatype">
				<title>XML as a Datatype</title>
				<ul>
					<li>SQL supports a wide variety of datatypes</li>
					<ul>
						<li>typed values are better than untyped values (they enable type-specific operations)</li>
						<li>XML can be regarded as just another data type</li>
					</ul>
					<li>Introducing a datatype lets the database recognize the data</li>
					<ul>
						<li>XML data can be stored in some format (a <q>persistent DOM</q>)</li>
						<li>databases can provide functionality avoiding parsing/serialization (DOM-based)</li>
					</ul>
				</ul>
			</slide>
			<slide id="xdbms-xmltype">
				<title>XML Datatype</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-datatype.png"/>
			</slide>
			<slide>
				<title>Mapping XML to Models</title>
				<ul>
					<li>Model-relevant data must be mapped to the database structures</li>
					<ul>
						<li>this assumes there is a ER-model which describes the database structure</li>
						<li>mapping XML is easy by definition because the XML is ER-compliant</li>
					</ul>
					<li>Is the data accessed as table data?</li>
					<ul>
						<li>if shredded data is only used to assemble it again, it is just performance overhead</li>
						<li>if shredded data is accessed relationally, then shredding makes sense</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Shredding (XML → Columns)</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-shredding.png"/>
			</slide>
			<slide>
				<title>XML as First-Class Citizen</title>
				<ul>
					<li>The <link href="xdbms-xmltype"/> defines XML as a sub-concept of ER</li>
					<ul>
						<li>the overall structure of the database is relational</li>
						<li>attributes may be of type XML, which means storing trees in tables</li>
					</ul>
					<li>Tables are not the only way to see the world</li>
					<ul>
						<li>XML trees are an <em>alternative</em> to tables, not a <em>datatype</em></li>
						<li>XML-centric applications should not be forced to use tables at all</li>
					</ul>
					<li>XML can be regarded as replacing the ER-concept altogether</li>
					<ul>
						<li>the database simply stores XML documents</li>
						<li>applications can store, query, update, and manage XML documents in the database</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML DBMS</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-xdbms.png"/>
			</slide>
		</part>
		<part>
			<title>XML in Relational Databases</title>
			<slide>
				<title>RDBish XML</title>
				<ul>
					<li>XML schemas can be designed with databases in mind</li>
					<ul>
						<li>avoid unbounded repetitions of elements</li>
						<li>avoid choices</li>
						<li>avoid ordered content</li>
						<li>avoid mixed content</li>
						<li>avoid recursion</li>
					</ul>
					<li>Many XML schemas are designed RDBish for compatibility reasons</li>
					<ul>
						<li>it was decided that the XML should allow easy mapping to tables</li>
						<li>the person designing the schema <q>thinks in ER</q></li>
						<li>the schema has been generated from a relational database schema</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Problematic XML</title>
				<ul>
					<li>XML in its full glory is too much for tables</li>
					<ul>
						<li>XML has been developed as a document format</li>
						<li>XML is about hierarchy (which <em>intentionally</em> have been left out of ER)</li>
						<li>XML is about highly irregular structures</li>
					</ul>
					<li>XML often is said to have two <q>flavors</q></li>
					<ul>
						<li><em>data-oriented XML</em>: regular data which can be easily mapped to tables</li>
						<li><em>document-oriented XML</em>: irregular structures which are hard to map to tables</li>
						<li>real-world XML often is a bit of both (e.g., <em>content</em> and <em>metadata</em>)</li>
					</ul>
					<li>Hybrid approaches sometimes are a good solution</li>
					<ul>
						<li>data-oriented can be shredded and stored in tables</li>
						<li>the document-oriented rest is stored as one object (text or <code>XML</code>)</li>
					</ul>
				</ul>
			</slide>
			<part id="sqlxml">
				<title>SQL/XML</title>
				<slide>
					<title>SQL/XML:2003</title>
					<ul>
						<li>SQL/XML provides <link href="xdbms-dbms-xmlsupport"/>s</li>
						<ul>
							<li>it introduces <link href="xdbms-xmldatatype"/></li>
							<li>it introduces a number of operations for generating XML from query results</li>
							<li>it defines mappings to bridge both worlds (SQL and XML)</li>
						</ul>
						<li>SQL/XML does not change anything about the database model</li>
						<ul>
							<li>data is still stored in tables only</li>
							<li>a column of a table may use the <code>XML</code> type</li>
							<li>queries may return results in XML rather than as SQL result sets</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>SQL/XML Example</title>
					<pre>SELECT
  e.EmpId,
  e.FirstName,
  e.LastName,
  e.StartDate,
  e.EndDate
FROM Employees e WHERE e.EmpId = 12</pre>
				<pre>SELECT
  XMLELEMENT (NAME "employee",
    XMLATTRIBUTES(e.EmpId as "id"),
    XMLELEMENT(NAME "names",
    XMLELEMENT(NAME "first", e.FirstName),
    XMLELEMENT(NAME "last", e.LastName)),
    XMLELEMENT(NAME "hire-dates",
      XMLATTRIBUTES(e.StartDate as "start", e.EndDate as "end")))
FROM Employees e WHERE e.EmpId = 12</pre>
				</slide>
				<slide>
					<title>SQL/XML:2007</title>
					<ul>
						<li>Adds the concept of <em>XML Tables</em></li>
						<li>XML Tables are not tables, they are containers for XML</li>
						<li>SQL/XML:2007 changes the database's data model</li>
						<ul>
							<li>it is now possible to have a database with <q>no tables</q></li>
							<li>likely use cases are to have both: traditional and XML tables</li>
						</ul>
						<li>SQL/XML:2007 defines a hybrid database: relational and XML database</li>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Tables and Trees don't Mix</title>
				<ul>
					<li>Tables and trees are different metamodels</li>
					<li>Different technologies are used to handle these different models</li>
					<li>Think before choosing the wrong tool</li>
				</ul>
			</slide>
			<slide>
				<title>Database Technologies do Mix</title>
				<ul>
					<li>Relational databases are good tools for regular data</li>
					<li>XML databases are good tools for documents</li>
					<li>SQL/XML:2007 defines a database that supports both</li>
					<li>Applications can choose the best mix of tables and trees</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="syndication">
        <title short="Syndication">Content Syndication</title>
        <date>2009-10-06</date>
        <toc class="reading"><a href="http://www.xml.com/pub/a/2004/08/18/pilgrim.html">Identifying Atom</a></toc>
        <toc class="resources"><a href="http://atompub.org/rfc4287.html" title="Atom RFC">Atom</a>&#160;· <a href="http://atompub.org/rfc5032.html" title="Atom Publishing Protocol (AtomPub) RFC">AtomPub</a>&#160;· <a href="http://validator.w3.org/feed/" title="W3C RSS/Atom Feed Validator">Validator</a></toc>
        <toc class="abstract">For many information sources on the Web, it is useful to have some standardized way of subscribing to information updates. Syndication formats such as  <em>RSS</em> and <em>Atom</em> can be used by these information sources to publish a <em>feed</em> of updated information items. While RSS and Atom are read-only formats, the <em>Atom Publishing Protocol (AtomPub)</em> build on top of Atom and provides a protocol for submitting new items to feeds.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Content Feeds</title>
			<ul>
				<li>Early Web content was static or updated very infrequently</li>
				<ul>
					<li>there was not yet the requirement to reuse content in different contexts</li>
				</ul>
				<li>Frequently updated Web content quickly became a very common scenario</li>
				<ul>
					<li>as commercial interests took over the Web, users should have a reason to re-visit a site</li>
					<li>presenting a steady stream of new content creates the image of a live Web site</li>
				</ul>
				<li>There are two major use cases where HTML is not sufficient</li>
				<ol>
					<li>users want an efficient way to get the updated content from a site</li>
					<li>sites want to aggregate updated content from other sites and re-publish it</li>
				</ol>
				<li><link href="syndication-formats"/> are designed to support these two use cases</li>
				<ul>
					<li>container formats for updated items</li>
					<li>a small amount of metadata about these items for automated processing</li>
				</ul>
			</ul>
		</slide>
		<part id="syndication-formats">
			<title>Syndication Formats</title>
			<part id="rss">
				<title>RSS</title>
				<slide id="rss-versions">
					<title>RSS History</title>
					<ul>
						<li><q><a href="http://diveintomark.org/archives/2004/02/04/incompatible-rss">The Myth of RSS Compatibility</a></q> provides a good overview</li>
						<li>RSS is a schoolbook example for <q>why standards are a good thing</q></li>
						<ul>
							<li><link href="rss09"/> was created for the <em>My Netscape</em> portal in March 1999</li>
							<li>RSS 0.91 (a simplification) was introduced in July 1999 (as an interim solution)</li>
							<li>the AOL/Netscape merger removed the format from the company's portal</li>
							<li>RSS was without an owner, and different parties claimed/denied ownership</li>
							<li><link href="rss10"/> was created by an informal developer group</li>
							<li>RSS 0.92 (and 0.93 and 0.94) were published without acknowledging RSS 1.0</li>
							<li>finally, <link href="rss20"/> was released as a follow-up to the RSS 0.9x versions</li>
						</ul>
						<li>Using RSS has become an exercise in managing a menagerie of versions</li>
					</ul>
				</slide>
				<slide id="rss09">
					<title>RSS 0.9</title>
					<ul>
						<li>RSS means <em>RDF Site Summary</em> (or <em>Rich Site Summary</em>?)</li>
						<ul>
							<li>based on an RDF draft and not compatible with the final RDF specification</li>
							<li>RDF was considered too cumbersome and unstable</li>
							<li>0.90 (proto-RDF) was quickly replaced by the non-RDF 0.91 version</li>
						</ul>
						<li>RSS 0.92+ versions were developed as unilateral specifications</li>
						<ul>
							<li>starting with RSS 0.91, RSS means <em>Rich Site Summary</em></li>
							<li>it is no longer built on RDF, instead it simply uses XML</li>
							<li>the 0.9x branch eventually was renamed to <link href="rss20"/></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>RSS 0.91 Example</title>
					<listing src="rss091.xml" line="2-12" href="http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html"/>
				</slide>
				<slide id="rss10">
					<title>RSS 1.0</title>
					<ul>
						<li>RSS means <em>RDF Site Summary</em> (this time for real)</li>
						<ul>
							<li>based on the final RDF specification and thus incompatible with any <link href="rss09"/></li>
							<li>developed when the <link href="semweb">Semantic Web</link> and <link href="rdf">RDF</link> were first heavily marketed (<a href="http://dret.net/biblio/reference/lee99" title='Tim Berners-Lee, Mark Fischetti, Michael Dertouzos, "Weaving the Web", HarperCollins, 1999'>1999</a>)</li>
							<li>RDF was expected to become the format for metadata on the Web</li>
						</ul>
						<li>RSS 1.0 makes heavy use of XML Namespaces</li>
						<li>RSS 1.0 introduces features which were not present in 0.91</li>
						<ul>
							<li>date information for published items (very relevant for news feeds)</li>
							<li>individual authors for various items in a feed</li>
						</ul>
						<li>RSS 1.0 is the latest version of RDF-based RSS</li>
						<ul>
							<li>the <link href="semweb">Semantic Web</link> wave is not over yet, but <link href="rdf">RDF</link> has lost its novelty appeal</li>
							<li>for a more XML-oriented encoding, <link href="rss09"/> provides a better foundation</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>RSS 1.0 Example</title>
					<listing src="rss10.xml" line="2-22" href="http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html"/>
				</slide>
				<slide id="rss20">
					<title>RSS 2.0</title>
					<ul>
						<li>RSS now means <em>Really Simple Syndication</em></li>
						<ul>
							<li>RSS 2.0 is the continuation of the 0.91 branch (which dropped RDF)</li>
							<li>together with <link href="rss10"/> it is the most popular version of RSS</li>
							<li>migration from 0.91 to 2.0 is easily possible</li>
						</ul>
						<li>RSS 2.0 tries to avoid the use of XML Namespaces</li>
						<li>RSS 2.0 is <a href="http://rss-extensions.org/wiki/Main_Page">increasingly used with extensions</a> for vendor-specific information</li>
						<ul>
							<li>the RSS core is minimal, so many applications need extensions</li>
							<li>many extensions have overlapping functionality</li>
							<li>most extensions have unclear semantics and unclear versioning policies</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>RSS 2.0 Example</title>
					<listing src="rss20.xml" line="2-14" href="http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html"/>
				</slide>
				<slide>
					<title>The Case for Content Management</title>
					<ul>
						<li>RSS is very rarely produced by hand</li>
						<ul>
							<li>by definition, RSS contains redundant information for a specific purpose</li>
						</ul>
						<li>If a <link href="cms"/> is used, RSS can be generated</li>
						<ul>
							<li>basic metadata can be generated by the CMS (title, author, date)</li>
							<li>better tagging of content results in better tagging of feeds</li>
							<li>well-tagged feeds are better foundations for large-scale reuse of feed items</li>
						</ul>
						<li>Blogging is simply a specialized case of a CMS</li>
						<ul>
							<li>Web-based interface for controlling everything</li>
							<li>strictly time-ordered sequenced of published items</li>
							<li>navigation features primarily based on the time-specific facets of the blog (maybe tags)</li>
							<li>all blogging tools include feed support</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Consuming RSS</title>
					<ul>
						<li>RSS feeds often have quality problems</li>
						<ul>
							<li>surprisingly often feeds do not even deliver well-formed XML</li>
							<li>the use of embedded markup in RSS is not well-defined</li>
						</ul>
						<li>Writing an RSS reader from scratch is not a good idea</li>
						<li>There are three major tasks which RSS readers must do</li>
						<ol>
							<li>accept non-XML RSS feeds and fix them to be XML</li>
							<li>look at the feed contents and bring them into a unified form</li>
							<li>produce a unified view of feeds regardless of the RSS version</li>
						</ol>
					</ul>
				</slide>
				<slide>
					<title>RSS Technical Problems</title>
					<ul>
						<li>What to put into an item's description</li>
						<ul>
							<li>the fundamental question is whether a description is text or HTML</li>
							<li>if there is no well-defined way, then interpretation is client-specific</li>
							<pre>&lt;description>This is a &lt;em>very important&lt;/em> blog post …</pre>
							<pre>&lt;description>This is a &amp;lt;em>very important&amp;lt;/em> blog post …</pre>
							<pre>&lt;description>This is a blog post about &lt;em> in RSS feeds …</pre>
							<pre>&lt;description>This is a blog post about &amp;lt;em> in RSS feeds …</pre>
							<pre>&lt;description>This is a blog post about &amp;amp;lt;em> in RSS feeds …</pre>
						</ul>
						<li>Underspecified and not very robust in various other areas</li>
						<ul>
							<li>broken RSS is accepted by most readers (but fixing it can change the interpretation)</li>
							<li>the interpretation of relative URIs is not mentioned in the specifications</li>
							<li>some minimal semantics (classification) for items would be very useful</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>RSS Political Problems</title>
					<ul>
						<li>Multiple and incompatible <link href="rss-versions"/> are still in widespread use</li>
						<ul>
							<li><link href="rss10"/> and <link href="rss20"/> are <q>incompatible by design</q> (RDF vs. non-RDF)</li>
							<li>none of the RSS versions is maintained by a universally accepted standards body</li>
						</ul>
						<li>None of the specifications is being updated or fixed</li>
						<ul>
							<li>some of the lessons learned by RSS deployment are not used in a new version</li>
							<li>it is unlikely that a new version will be produced which merges the RSS landscape</li>
						</ul>
						<li>Invent something new instead of trying to fix RSS</li>
						<ul>
							<li><link href="atom"/> started in 2003 (called <em>Echo</em> at first)</li>
							<li>W3C or IETF would have been promising candidates for a <q>new RSS</q></li>
							<li>W3C is more formal, IETF is more developer-centered</li>
							<li><a href="http://www.bestkungfu.com/?p=492">IETF was chosen over W3C</a> because the of Atom community's preferences</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="atom">
				<title>Atom</title>
				<slide>
					<title>Atom History</title>
					<img src="atom-logo.png" href="http://atompub.org/" style="float : right ; width : 20% ; margin-top : 0.5em ; margin-right : 2em ; "/>
					<ul>
						<li>RSS's shortcomings were very apparent and could not be fixed</li>
						<li>In mid-2003, discussions started about an improved format</li>
						<li>It also became apparent that the format should have a protocol</li>
						<li>Atom 0.3 was released in December 2003 but had no formal home</li>
						<li>IETF was chosen as the new home with a working group in June 2004</li>
						<li><a href="http://dret.net/rfc-index/reference/RFC4287">RFC 4287</a> was published in December 2005</li>
						<li><link href="atompub">AtomPub</link> has been published as <a href="http://dret.net/rfc-index/reference/RFC5032">RFC 5032</a> in October 2007</li>
					</ul>
				</slide>
				<slide>
					<title>Atom vs. RSS</title>
					<ul>
						<li>Standardized by the IETF (well-defined process)</li>
						<li>Classification of entries (user-defined categories)</li>
						<li>More XML-like markup design (more nesting)</li>
						<li>Namespaces are used and supported as standard mechanism</li>
						<li>Atom feeds <em>must</em> be well-formed XML (there even <a href="http://atompub.org/2005/08/17/atom.rnc" title="Atom RELAX NG Schema">is a schema</a>)</li>
						<li>Interpretation of content is well-defined (various content types)</li>
						<li>Support for <code>xml:lang</code> and <code>xml:base</code></li>
					</ul>
				</slide>
				<slide>
					<title>Atom Example</title>
					<listing src="atom.xml"/>
				</slide>
				<slide>
					<title>Atom Content</title>
					<ul>
						<li>RSS had no safe way of finding out what an entry's content is</li>
						<ul>
							<li>this led to different implementations being <q>smart</q> about what the RSS author really wanted</li>
							<li>one of Atom's main goals was to improve this in a well-defined way</li>
							<li>Atom allows escaped markup (the only way to include non-XML HTML in an XML format)</li>
						</ul>
						<li>Each <elem>content</elem> element should have a <atom>type</atom> (the default is <code>text</code>)</li>
						<li>Atom's content interpretation algorithm (use first applicable rule):</li>
						<ol>
							<li>if <atom>type</atom> is <code>text</code>, no child elements are allowed (plain text content)</li>
							<li>if <atom>type</atom> is <code>html</code> then RSS's method of escaped markup is used</li>
							<li>if <atom>type</atom> is <code>xhtml</code> then there must be an <elem>div</elem> containing XHTML markup</li>
							<li>if <atom>type</atom> is an XML <link href="mediatypes">media type</link> then the content should be treated as this type</li>
							<li>if <atom>type</atom> starts with <code>text/</code> then no child elements are allowed</li>
							<li>for all other values, the content must be an base64-encoded entity of the specified MIME type</li>
						</ol>
					</ul>
				</slide>
				<slide>
					<title>Atom Content Examples</title>
					<pre href="http://www.xml.com/lpt/a/1633"><![CDATA[<content type="xhtml">
  <div xmlns="http://www.w3.org/1999/xhtml">
    One <strong>bold</strong> foot forward
  </div>
</content>]]></pre>
					<pre href="http://www.xml.com/lpt/a/1633"><![CDATA[<content>The "atom:content" element either contains or links to the content of the entry. The content of atom:content is Language-Sensitive.</content>]]></pre>
					<pre href="http://www.xml.com/lpt/a/1633"><![CDATA[<content type="html">The &lt;code>atom:content&lt;/code> element either contains or links to the content of the entry. The content of &lt;code>atom:content&lt;/code> is &lt;a href="http://www.ietf.org/rfc/rfc3066.txt">Language-Sensitive&lt;/a>.</content>]]></pre>
					<pre href="http://www.xml.com/lpt/a/1633"><![CDATA[<content type="image/png">
iVBORw0KGgoA … TAAAAAElFTkSuQmCC
</content>]]></pre>
					<pre href="http://www.xml.com/lpt/a/1633"><![CDATA[<content src="image.png" type="image/png"/>]]></pre>
				</slide>
				<slide>
					<title>Atom Categories</title>
					<ul>
						<li>Atom allows to assign categories to entries</li>
						<ul>
							<li>each <elem>category</elem> element must have a <atom>term</atom> attribute for the category</li>
							<li>an optional <atom>scheme</atom> identifies the categorization scheme (ontology, taxonomy, …)</li>
							<li>an optional <atom>label</atom> attribute provides a human-readable label for the category</li>
						</ul>
						<li><link href="atompub">AtomPub</link> defines a document format for <link href="atom-category-documents"/></li>
						<li>Three different cases of categorization can be distinguished</li>
						<ol>
							<li>use a well-known scheme (such as <em>Dublin Core</em>)</li>
							<li>use a private but well-designed scheme (which has a URI and can be reused reliably)</li>
							<li>use tags without schemes, which then are little more than content labels</li>
						</ol>
						<li>Widely-known tags are <a href="http://www.tbray.org/ongoing/When/200x/2007/02/01/Tag-Scheme">not easy to handle</a></li>
						<ul>
							<li>they are more than just privately assigned tags</li>
							<li>there is no formal scheme for them, just an emerging consensus</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Switching from RSS to Atom</title>
					<ul>
						<li>Generate both feeds but serve RSS with an HTTP redirect (301)</li>
						<ul>
							<li>old subscribers with broken clients can still use the RSS feed</li>
							<li>old subscribers with correct clients will use the Atom feed</li>
						</ul>
						<li>Atom exposes more information than RSS (<elem>category</elem> for tags)</li>
						<ul>
							<li>the mapping of publishing info to the feed has to be changed/extended</li>
							<li>for standard metadata use Atom's built-in metadata elements</li>
							<li>for application-specific metadata consider reusing an existing metadata schema</li>
						</ul>
						<li>Atom can be used to publish snippets as well as full content</li>
						<ul>
							<li><elem>content</elem> allows any type of content to be used and may contain a complete entry</li>
							<li><elem>summary</elem> allows only text and should provide a condensed version of an entry</li>
							<li>some Atom sources publish two feeds for summaries and content</li>
						</ul>
						<li>Generate good Atom and downgrade it to RSS 1.0 &amp; 2.0</li>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Syndication Aggregation</title>
			<slide>
				<title>End-User Aggregation</title>
				<img src="feed-icon.png" style="float : right ; margin-top : 0.5em ; margin-right : 2em ; "/>
				<ul>
					<li>Users often have a small number of preferred Web sites</li>
					<ul>
						<li>news can be tracked by subscribing to their feeds</li>
						<li>this requires an feed-aware client (Firefox is not a good feed reader)</li>
					</ul>
					<li>Ajax-based feeds could be implemented as a client-side application</li>
					<ul>
						<li>retrieve the feed, transform it to HTML, and render the result</li>
						<li>but the security restriction of <code>XMLHttpRequest</code> gets into the way</li>
						<li>there is a way around this as used in the <a href="http://www-128.ibm.com/developerworks/library/x-ajaxrss/" title="IBM developerWorks Article by Jack D. Herrington">Ajax RSS reader</a></li>
						<li><a href="http://www.google.com/reader/view/">Google Reader</a> accesses Google's own copy of the feed (<link href="feedburner"/>)</li>
					</ul>
					<li>How can users find a feed?</li>
					<ul>
						<li>feeds have URIs (they are Web resources) and can be referenced from within HTML</li>
					</ul>
				</ul>
				<pre title="Feed Autodiscovery for RSS">&lt;link rel="alternate" type="application/rdf+xml" title="…" href="…" />
&lt;link rel="alternate" type="application/rss+xml" title="…" href="…" /></pre>
				<pre title="Feed Autodiscovery for Atom">&lt;link rel="alternate" type="application/atom+xml" title="…" href="…" /></pre>
			</slide>
			<slide>
				<title>Aggregation Intermediaries</title>
				<div style="float : right ; width : 20% ; margin-right : 1.5em">
					<script type="text/javascript" src="http://www.google.com/reader/ui/publisher.js"></script>
					<script type="text/javascript" src="http://www.google.com/reader/public/javascript/user/16601496766743088901/state/com.google/broadcast?n=5&amp;callback=GRC_p(%7Bc%3A'gray'%2Ct%3A'Shared%20items'%2Cs%3A'true'%7D)%3Bnew%20GRC"></script>
				</div>
				<ul>
					<li>RSS is a frequently used format between content providers</li>
					<ul>
						<li>news agencies want to distribute news items as easy as possible</li>
						<li>by using a single format, producers and consumers can more easily interoperate</li>
					</ul>
					<li>The <link href="rss-versions"/> caused publishers to look for something better</li>
					<ul>
						<li><link href="rss"/> had a head start and still is widely used</li>
						<li><link href="atom"/> is a much better format and will eventually replace RSS</li>
					</ul>
					<li>User-configured portals are the merger between both scenarios</li>
					<ol>
						<li>users select a number of feeds as their preferred information sources</li>
						<li>the feeds are presented on a <a href="http://www.google.com/reader/view/" title="Google Reader">single personalized Web page</a></li>
						<li>by <a href="http://www.google.com/reader/shared/16601496766743088901" title="dret's Google Reader Shared Items">sharing</a> and <a href="http://www.google.com/reader/public/atom/user/16601496766743088901/state/com.google/broadcast" title="dret's Google Reader Atom Feed">re-publishing</a> items, users are becoming aggregation intermediaries</li>
					</ol>
				</ul>
			</slide>
			<part id="feedburner">
				<title>FeedBurner</title>
				<slide>
					<title>Fixing Feeds</title>
					<img style="width : 90% ; margin : 2% ; " src="feedburner-cleanup.png" title="Cleaning Up Feeds"/>
				</slide>
				<slide>
					<title>Load Balancing</title>
					<img style="width : 90% ; margin : 2% ; " src="feedburner-load-balancing.png" title="Providing Feed Load Balancing"/>
				</slide>
				<slide>
					<title>Statistics/Analytics</title>
					<img style="width : 90% ; margin : 2% ; " src="feedburner-statistics.png" title="Providing Feed Statistics"/>
				</slide>
			</part>
		</part>
		<part id="atompub">
			<title>Atom Publishing Protocol</title>
			<slide>
				<title>Syndication Format Protocols</title>
				<ul>
					<li>LiveJournal (very simple text-based protocol)</li>
					<ul>
						<li>not very good at handling structures (re-inventing for encoding structure)</li>
					</ul>
					<li>Blogger (now at <a href="http://code.google.com/apis/blogger/overview.html">Google</a> after <a href="http://web.archive.org/web/20031008161432/http://weblog.siliconvalley.com/column/dangillmor/archives/000802.shtml">Google bought Pyra</a>)</li>
					<ul>
						<li>no support for titles or any other sort of entry metadata</li>
						<li>protocol from the early days of blogging before tagging became popular</li>
					</ul>
					<li><a href="http://www.xmlrpc.com/metaWeblogApi">MetaWeblog</a> (an attempt to improve Blogger)</li>
					<ul>
						<li>extends Blogger using a very bad design (RSS XML as XML-RPC structure encoded as XML)</li>
					</ul>
					<li><em>Atom Publishing Protocol (AtomPub)</em> is an attempt to provide a clean alternative</li>
					<ul>
						<li>use the same document structures for feeds and the protocol interacting with them</li>
						<li>use a REST approach to provide a simple and Web-compatible protocol</li>
						<li>add <link href="atom-service-documents"/> and <link href="atom-category-documents"/> for additional tasks</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>RESTified Syndication</title>
				<ul>
					<li>Atom is a format for retrieving a set of entries as a feed document</li>
					<ul>
						<li>feeds often are time-based and are refreshed periodically or whenever needed</li>
						<li>feeds can use any other strategy for deciding what to publish</li>
					</ul>
					<li>Read-only access to feeds should be complemented by full access</li>
					<ul>
						<li>full access needs the <q>CUD</q> out of the <q>CRUD</q> set of operations</li>
						<li>many Web-centric technologies try to build on the Web's REST model of interaction</li>
					</ul>
					<li>AtomPub builds on Atom and adds a REST-based protocol on top of it</li>
					<ul>
						<li><http>POST</http> for creating new entries (sending the request to the collection)</li>
						<li><http>PUT</http> for updating existing entries (overwriting the existing entry)</li>
						<li><http>DELETE</http> for deleting entries from a collection</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Collections, Members, Entries, Media</title>
				<ul>
					<li>AtomPub's top-level concept is a <em>collection</em></li>
					<ul>
						<li>collections are used for managing and organizing members</li>
						<li>Atom feed documents are the representation of collections</li>
					</ul>
					<li>Members of a collection can be <em>entry</em> and <em>media</em> resources</li>
					<ul>
						<li>entry resources represent metadata and are represented as Atom entries</li>
						<li>media resources can have any media type and are the data described by entries</li>
						<li>a <em>media link entry</em> is an entry associated with a member</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Protocol Summary</title>
					<table border="1" cellpadding="20" style="width : 90% ; margin : 2% ; ">
						<thead>
							<tr valign="top">
								<th>Resource</th>
								<th>HTTP Method</th>
								<th>Representation</th>
								<th>Description</th>
							</tr>
						</thead>
						<tbody>
							<tr valign="top">
								<td>Introspection</td>
								<td>
									<http>GET</http>
								</td>
								<td><link href="atom-service-documents">Atom Service Document</link></td>
								<td>Enumerates a set of collections and lists their URIs and other information about the collections</td>
							</tr>
							<tr valign="top">
								<td>Collection</td>
								<td>
									<http>GET</http>
								</td>
								<td>Atom Feed</td>
								<td>A list of member of the collection (this may be a subset of all entries in the collection)</td>
							</tr>
							<tr valign="top">
								<td>Collection</td>
								<td>
									<http>POST</http>
								</td>
								<td>Atom Entry</td>
								<td>Create a new entry in the collection</td>
							</tr>
							<tr valign="top">
								<td>Member</td>
								<td>
									<http>GET</http>
								</td>
								<td>Atom Entry</td>
								<td>Get the Atom Entry</td>
							</tr>
							<tr valign="top">
								<td>Member</td>
								<td>
									<http>PUT</http>
								</td>
								<td>Atom Entry</td>
								<td>Update the Atom Entry</td>
							</tr>
							<tr valign="top">
								<td>Member</td>
								<td>
									<http>DELETE</http>
								</td>
								<td>n/a</td>
								<td>Delete the Atom Entry from the collection</td>
							</tr>
						</tbody>
					</table>
			</slide>
			<slide id="atom-service-documents">
				<title>Service Documents</title>
				<blockquote>Service Documents represent server-defined groups of Collections, and are used to initialize the process of creating and editing resources.</blockquote>
				<ul>
					<li>The <q>real</q> top-level construct of AtomPub is the <em>workspace</em></li>
					<ul>
						<li>collections on a server are organized into different workspaces</li>
						<li>workspaces have no AtomPub semantics and no operations can be performed on them</li>
					</ul>
					<li>Service documents list constraints on the members of collections</li>
					<ul>
						<li><atom>accept</atom> specifies a comma-separated list of media ranges (with <code>entry</code> as special value)</li>
						<li><atom>categories</atom> defines the list of categories that can be applied to members (can be <code>fixed</code>)</li>
						<li>AtomPub servers are likely to reject operations not satisfying these constraints</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Service Document Example</title>
				<listing src="atom-service.xml"/>
			</slide>
			<slide id="atom-category-documents">
				<title>Category Documents</title>
				<ul>
					<li>Categories are important for creating and reading entries</li>
					<ul>
						<li>they may contain metadata using any classification scheme</li>
					</ul>
					<li><link href="atom-service-documents"/> contain a list of allowed categories</li>
					<li>AtomPub defines a document format for standalone category documents</li>
					<ul>
						<li>a useful interface between AtomPub systems and other systems using classification schemes</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Category Document Example</title>
				<listing src="atom-category.xml"/>
			</slide>
		</part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Semantic Web Light</title>
				<ul>
					<li>Syndication creates representations for universal concepts</li>
					<li>Atom adds some concepts to RSS's model</li>
					<li>Syndication revolves around the idea of interacting with items</li>
					<li>Atom-based interaction is one way of implementing REST</li>
					<li>For more semantics, Atom is only the foundation</li>
				</ul>
			</slide>
        </part>
    </presentation>
     <presentation id="mediatypes">
        <title>Media Types</title>
        <date>2009-10-08</date>
        <toc class="reading"><a href="http://www.w3.org/2001/tag/doc/mime-respect" title="Authoritative Metadata">MIME Respect</a></toc>
        <toc class="resources"><a href="http://dret.net/rfc-index/reference/RFC2046" title="Media types RFC">MIME</a>&#160;· <a href="http://www.iana.org/assignments/media-types/" title="IANA media type registry">Registry</a></toc>
        <toc class="assignment"><a href="a/3/">A3</a>&#160;assigned (due&#160;date:&#160;10/18)</toc>
        <toc class="abstract">One of the most important aspect of computer-based communications is the concept of <em>media types</em>, the question what type of information some digital artifact represents, and how it is encoded. The most common standard for this information is the scheme introduced by <em>Multipurpose Internet Mail Extensions (MIME)</em>. Media types can be negotiated by peers communicating through HTTP. Some media types allow <em>fragment identifiers</em>, which allow references to a resource to identify a fragment of the complete resource.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide id="mime">
			<title>Multipurpose Internet Mail Extensions (MIME)</title>
			<ul>
				<li>Basic e-mail only supports ASCII text messages</li>
				<li>MIME was introduced in 1993 to standardize a more powerful message format</li>
				<ul>
					<li>multiple objects in a single message</li>
					<li>text having unlimited line length or overall length</li>
					<li>character sets other than ASCII, allowing non-English language messages</li>
					<li>binary or application specific files</li>
					<li>images, audio, video and multi-media messages</li>
				</ul>
				<li>Resource types are necessary for every automated action with resources</li>
				<ul>
					<li>Unix started with <code>/etc/mime.types</code>, a list of mappings between extensions and media types</li>
					<li>the Unix <code>file</code> command uses simple fingerprints (specified in <code>/etc/magic</code>)</li>
					<li>double-clicking in GUIs needs a file association (based on the file's type) to work</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Unix File Type Handling</title>
			<listing src="mime.types" line="376-386" title="Rosetta's /etc/mime.types"/>
			<listing src="mime-magic" line="381-392" title="Rosetta's /etc/mime-types"/>
		</slide>
		<slide>
			<title>Windows File Type Handling</title>
			<img style="width : 90% ; margin : 2% ; " src="windows-file-types.png" title="Windows File Type Handling"/>
		</slide>
		<part>
			<title>Media Types and the Web</title>
			<slide>
				<title>Browsers and Resources</title>
				<ul>
					<li>Web browsers retrieve resources and render them</li>
					<ul>
						<li>HTTP can transfer any kind of resource (binary resources must be transfer encoded)</li>
						<li>resource types cannot (and should not) be inferred from the URI</li>
					</ul>
					<li>HTTP combines data transfer, transfer management, and metadata</li>
					<ul>
						<li>basic information about a resource (modification date)</li>
						<li>information describing the resource's type (media type) and content (language)</li>
						<li><link href="http-conneg"/> can be used to request a specific resource variant</li>
					</ul>
					<li>The resource type received may or may not be supported by the browser</li>
					<ul>
						<li><em>built-in support</em> is provided for the core Web resource types (HTML, GIF, JPEG)</li>
						<li><em>plug-in</em> support is an add-on to the browser for popular types (PDF, Flash)</li>
						<li><em>external applications</em> are standalone applications invoked by the browser</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Firefox Media Type Handling</title>
				<img style="width : 90% ; margin : 2% ; " src="firefox-media-types.png" title="Controlling Media Type Handling in Firefox"/>
			</slide>
			<slide>
				<title>Media Type Control in Browsers</title>
				<ul>
					<li>In practice, media type control is less than perfect</li>
					<ul>
						<li>getting control over media types means getting control over your computer</li>
						<li>applications are rarely written in a perfectly well-designed way</li>
					</ul>
					<li>XML is handled very poorly by Firefox</li>
					<ul>
						<li><q>rendering</q> XML is unbelievably slow (and very primitive)</li>
						<li>if the XML has errors (real or imagined by Firefox), there is no way to get to it</li>
						<li>helper applications can be registered, but this is ignored by Firefox</li>
					</ul>
					<li>Competing application suites hijack as many media types as possible</li>
					<ul>
						<li><em>QuickTime</em> and <em>Windows Media Player</em> support the same media types</li>
						<li>installing or updating these packages often changes many media type settings</li>
					</ul>
				</ul>
			</slide>
		</part>
        <part>
			<title>Media Types</title>
			<slide id="mime-content-types">
				<title>Content Types</title>
				<ul>
					<li>MIME splits the world of resource types into <em>Content Types</em> and <em>Subtypes</em></li>
					<ul>
						<li><em>Content types</em> are the main classification of a resource type</li>
						<li><link href="mime-subtypes"/> qualify the format and encoding used for the content</li>
					</ul>
					<li>Content types classify the world of resource type into 8 areas</li>
					<ul>
						<li><mime>audio</mime> for media types representing exclusively audio signals</li>
						<li><mime>image</mime> for any media type representing two-dimensional images</li>
						<li><mime>message</mime> for resources representing e-mail messages</li>
						<li><mime>model</mime> for complex representations of physical objects (very unpopular)</li>
						<li><mime>multipart</mime> for MIME entities containing multiple individual MIME-tagged resources</li>
						<li><mime>text</mime> for mainly textual material (e.g., HTML is considered to be text)</li>
						<li><mime>video</mime> for media types combining moving pictures with audio</li>
						<li><mime>application</mime> for any resource which cannot by classified anywhere else</li>
						<li>(<mime>example</mime> is only used for media type examples, not for real-world resources)</li>
					</ul>
				</ul>
			</slide>
			<slide id="mime-subtypes">
				<title>Subtypes</title>
				<ul>
					<li>Within each content type, many different data formats are in use</li>
					<ul>
						<li>content types only allow a broad classification</li>
						<li>subtypes allow the identification of a specific data format of a resource</li>
					</ul>
					<li>Subtypes are expected to be <link href="mime-registration">registered</link> with the <a href="http://www.iana.org/" title="Internet Assigned Names Authority">IANA</a></li>
					<ul>
						<li>unregistered subtypes can be used but must have a <mime>x-</mime> prefix</li>
					</ul>
					<li>Additional qualifiers can be used to be more specific</li>
					<ul>
						<li><mime>text/plain</mime> is the media type for plain text files</li>
						<li>plain text files have additional properties such as character encoding and language</li>
						<li><mime>text/plain</mime> can be further qualified to <mime>text/plain; charset=iso-8859-1</mime></li>
						<li>not all qualifications are available as media type (e.g., language is not addressed)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>What is XML?</title>
				<ul>
					<li>XML can be regarded from various point of view</li>
					<ul>
						<li>XHTML is (almost) identical to HTML and should be regarded as <mime>text</mime></li>
						<li>configuration files are data for some program and should be regarded as <mime>application</mime></li>
						<li>XML schemas can be mixes of these two extreme cases</li>
					</ul>
					<li><a href="http://dret.net/rfc-index/reference/RFC3023">RFC 3023</a> registers a set of XML-related media types</li>
					<ul>
						<li><mime>application/xml</mime> for generic XML usage (data oriented XML)</li>
						<li><mime>text/xml</mime> for textual XML usage (document oriented XML)</li>
						<li><mime>application/xml-dtd</mime> for DTDs (which are not XML documents)</li>
						<li><mime>application/xslt+xml</mime> (proposed) for XSLT stylesheets (<mime>text/css</mime> is defined by <a href="http://dret.net/rfc-index/reference/RFC2318">RFC 2318</a>)</li>
						<li><mime>image/svg+xml</mime> (proposed) for vector graphics using <link href="svg"/></li>
						<li><mime>application/xhtml+xml</mime> is defined by <a href="http://dret.net/rfc-index/reference/RFC3236">RFC 3236</a></li>
						<li>… and a number of other suggestions for future XML media types</li>
					</ul>
				</ul>
			</slide>
			<slide id="mime-registration">
				<title>Media Type Registration</title>
				<ul>
					<li>Media types need to be registered together with a documentation</li>
					<ul>
						<li>this makes sense if it is assumed that registered types should be openly accessible</li>
						<li>this becomes complicated if the types are proprietary and not publicly documented</li>
					</ul>
					<li>It makes sense to register types even if they are not publicly documented</li>
					<ul>
						<li>if a Word document is sent by e-mail it should be opened by the Word application</li>
						<li>IANA registers <q><mime>vnd.</mime></q> prefixed subtypes with less requirements than <q>regular</q> types</li>
						<li>vendor specific types are often undocumented and may change significantly over time</li>
					</ul>
					<li>Using well-defined types makes handling resources more stable</li>
					<ul>
						<li>the IANA registry contains hundreds of types (most of them <mime>application</mime> types)</li>
						<li>when designing applications dealing with various content types, use media types as the foundation</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title><mime>application/msword</mime> Media Type</title>
				<listing src="msword.txt" line="43-63" title="application/msword Media Type Registration" href="http://www.iana.org/assignments/media-types/application/msword"/>
			</slide>
			<part>
				<title>Text Content Types</title>
				<slide id="mime-plaintext">
					<title>Plain Text</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC2046">RFC 2046</a> defines plain text files as a basic media type</li>
						<ul>
							<li>any text file that does not contain structures which are intended for machine-based processing</li>
							<li>even <link href="mime-csv"/> files do not count as plain text</li>
						</ul>
						<li>Guessing of character encoding is hard and unreliable and should be avoided</li>
						<ul>
							<li>the character encoding can be specified with an additional parameter: <mime>text/plain; charset=iso-8859-1</mime></li>
							<li>if no such parameter is present, ASCII should be assumed as the character encoding</li>
						</ul>
						<li>For more specific text subtypes, <a href="http://www.iana.org/assignments/media-types/text/">various other subtypes exist</a></li>
						<ul>
							<li><mime>calendar</mime> for information about calendar entries</li>
							<li><mime>javascript</mime> for JavaScript code (should now be marked as <mime>application/javascript</mime>)</li>
							<li><mime>sgml</mime> and <mime>xml</mime> for text with additional markup</li>
						</ul>
					</ul>
				</slide>
				<slide id="mime-html">
					<title>HTML</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC2854">RFC 2854</a> registers <mime>text/html</mime> for HTML documents</li>
						<ul>
							<li>like <link href="mime-plaintext"/> the character encoding can also be specified as a parameter</li>
							<li>it is not specific for some version of HTML (version information can be found in the HTML document)</li>
						</ul>
						<li><link href="html-fragment-identifiers"/> are also defined by the media type registration</li>
						<li>HTML in many cases needs additional resources to be <q>self-contained</q></li>
						<ul>
							<li>images which are references by <elem>img</elem> elements (maybe external image maps)</li>
							<li>other media referenced by <elem>object</elem> or <elem>applet</elem> (or the deprecated <elem>embed</elem>)</li>
							<li>stylesheets or scripts which are referenced in the document head (they may reference other files …)</li>
							<li>generating a truly self-contained HTML is a rather hard task</li>
						</ul>
						<li>MIME can be used to represent a self-contained <a href="http://dret.net/glossary/mhtml" title="MIME HTML">MHTML</a> (<a href="http://dret.net/rfc-index/reference/RFC2557">RFC 2557</a>)</li>
					</ul>
				</slide>
				<slide id="mime-csv">
					<title short="CSV">Comma-Separated Values (CSV)</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC4180">RFC 4180</a> defines a textual format for <q>spreadsheet data</q></li>
						<li>CSV has been used for a long time, but some of the details were solved differently</li>
						<li>Defining a media type makes it easier for implementations to know what to expect</li>
						<ul>
							<li>the registration not only registers the type, but also defines it</li>
						</ul>
						<li>CSV is not overly complex, but some issues have to be solved</li>
						<ul>
							<li>how to separate lines (CRLF)</li>
							<li>how to end the file (CRLF is allowed but optional)</li>
							<li>are there headers allowed (yes, but they are not marked as such)</li>
							<li>may different lines use different numbers of fields (no)</li>
							<li>are spaces significant (yes)</li>
							<li>are quotes significant (no, they are delimiters, so quotes as values must be escaped)</li>
							<li>how to treat fields with CRLF, commas, or quotes (enclose the value in quotes)</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Application Content Types</title>
				<slide>
					<title>JSON</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC4627">RFC 4627</a> registers JSON as a media type</li>
						<li>The definition of JSON is derived from ECMAScript's <em>object literals</em></li>
						<li>JSON is a very limited notation intended for simple structures</li>
						<ul>
							<li>it allows the four primitive types <em>strings</em>, <em>numbers</em>, <em>booleans</em>, and <em>null</em></li>
							<li>it allows the two structured types <em>objects</em> (unordered) and <em>arrays</em> (ordered)</li>
						</ul>
						<li>The value of the media type in this case is the clean integration into the Web</li>
						<ul>
							<li>information providers may choose to expose their data in JSON and XML</li>
							<li><link href="http-conneg"/> can be used to specify which representation is preferred</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Image Content Types</title>
				<slide id="mime-gif">
					<title short="GIF">Graphic Interchange Format (GIF)</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC2046">RFC 2046</a> registers the oldest graphics format on the Web</li>
						<li>GIF was subject of a long patent debate</li>
						<ul>
							<li>the compression technique of GIF (<a href="http://en.wikipedia.org/wiki/Lzw" title="Lempel-Ziv-Welch">LZW</a>) had been patented by Unisys (1983)</li>
							<li>Unisys wanted to get licensing fees from all commercial online uses of GIF</li>
							<li><link href="png"/> was developed as an effort to develop a copyright-free format</li>
							<li>in 1999, Unisys changed its tactics and wanted to collect one-time fees ($5000-$7500) from all users</li>
							<li>all GIF-related LZW expired in 2003/2004, so GIF is freely available now</li>
						</ul>
						<li>GIF's poor features make PNG the better choice anyway</li>
						<ul>
							<li>8 bit color (requires dithering for photographs), binary transparency</li>
							<li>GIF's animation feature is the only thing that is not available in PNG … <img src="running-wolf.gif" style="height : 0.8em"/></li>
						</ul>
					</ul>
				</slide>
				<slide id="mime-jpeg">
					<title short="JPEG">Joint Photographic Experts Group (JPEG)</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC2046">RFC 2046</a> standardizes the second popular image format for the Web</li>
						<li>JPEG has been specifically designed for photographs</li>
						<ul>
							<li>it always is lossy (it cannot preserve the complete information from a random bitmap)</li>
							<li>it uses perception-based compression (for example, color precision is sacrificed for brightness)</li>
						</ul>
					</ul>
					<table style="width : 90% ; margin : 4% ;  font-size : smaller ; ">
						<tr>
							<td align="center">
								<img src="jpeg-average-quality.jpg" title="Average Quality JPEG" href="http://en.wikipedia.org/wiki/JPEG#Photographs"/>
							</td>
							<td align="center">
								<img src="jpeg-low-quality.jpg" title="Low Quality JPEG" href="http://en.wikipedia.org/wiki/JPEG#Photographs"/>
							</td>
							<td align="center">
								<img src="jpeg-lowest-quality.jpg" title="Lowest Quality JPEG" href="http://en.wikipedia.org/wiki/JPEG#Photographs"/>
							</td>
						</tr>
						<tr>
							<th>Q = 50, filesize 15,138 bytes</th>
							<th>Q = 10, filesize 4,787 bytes</th>
							<th>Q = 1, filesize 1,523 bytes</th>
						</tr>
					</table>
				</slide>
				<slide id="mime-png">
					<img src="png-transparency.png" style="float : right ; "/>
					<title short="PNG">Portable Network Graphics (PNG)</title>
					<ul>
						<li>PNG is registered as <mime>image/png</mime> and is the third major image format</li>
						<ul>
							<li>PNG was intended to be a royalty- and copyright-free replacement of <link href="gif">GIF</link></li>
							<li>image formats need to supported by browsers and thus take a long time until they are established</li>
							<li>IE6 implements PNG in a very rudimentary form, IE7 handles PNG correctly</li>
						</ul>
						<li>PNG has some advantages over GIF and JPEG</li>
						<ul>
							<li>lossless, compressed palette, grayscale, or true color images</li>
							<li>8 bit alpha channel for gradual opacity (blending into the background)</li>
						</ul>
						<li>JPEG still is the preferred format for photographic pictures</li>
						<li>GIF still is the preferred format for animated images</li>
						<ul>
							<li><a href="http://en.wikipedia.org/wiki/Mng" title="Multiple-image Network Graphics">MNG</a> and <a href="http://en.wikipedia.org/wiki/Apng" title="Animated Portable Network Graphics">APNG</a> are two available but not widely supported PNG animation formats</li>
						</ul>
					</ul>
				</slide>
			</part>
        </part>
        <part id="fragment-identifiers">
			<title>Fragment Identifiers</title>
			<slide>
				<title>Identification of Resource Fragments</title>
				<ul>
					<li>URIs identify a resource (based on a scheme and a scheme-specific part)</li>
					<ul>
						<li>URIs do not necessarily identify a specific representation of a resource</li>
						<li>any representation-specific operation needs to look at the resource type</li>
					</ul>
					<li>Fragment identifiers can be used to identify a part of a resource</li>
					<ul>
						<li><code href="http://dret.net/netdret/publications#wil02h">http://dret.net/netdret/publications<span style="color : red">#wil02h</span></code></li>
						<li>fragments are a <em>client side</em> concept (the HTTP GET requests the complete resource)</li>
						<li>if the client supports fragment handling, the identifier is interpreted</li>
					</ul>
					<li>Fragment identifiers and content negotiation have a problematic relationship</li>
					<ul>
						<li>keeping fragment identifiers stable across resource variants may be hard</li>
						<li>nothing in the fragment identifier associates it with a specific resource variant</li>
					</ul>
				</ul>
			</slide>
			<slide id="html-fragment-identifiers">
				<title>HTML Fragment Identifiers</title>
				<ul>
					<li>HTML allows to address named/identified elements in the HTML document</li>
					<ul>
						<li>the first HTML versions required named <code>&lt;a name="frag-id">incoming anchors&lt;/a></code></li>
						<li>newer HTML versions allow <code>&lt;p id="frag-id">every element to have an id&lt;/p></code></li>
						<li>browsers support both ways, but the <xml>id</xml> variant should be preferred</li>
					</ul>
					<li>Only named/identified fragments can be identified</li>
					<ul>
						<li>99.99% of all page authors do not routinely add identifications</li>
						<li>tools may be smarter and take over that task (e.g., <em>Movable Type</em> identifies all relevant elements)</li>
						<li>for most pages on the Web this means users cannot link to most elements in them</li>
					</ul>
					<li>Keeping fragment identifiers stable should be a goal of Web authors</li>
					<ul>
						<li>identify the key fragments (and maybe provide a better UI than <q>view source</q>)</li>
						<li>never change identifiers once they have been assigned</li>
					</ul>
				</ul>
			</slide>
			<slide id="xml-fragment-identifiers">
				<title>XML Fragment Identifiers</title>
				<ul>
					<li>XPointer is the language which defines XML fragment identifiers</li>
					<ul>
						<li>XPointer's history and success is even worse than XLink's</li>
						<li>the interesting part was never finished, the easier parts have been finalized</li>
					</ul>
					<li>XPointer supports HTML's approach of <em>identified elements</em></li>
					<ul>
						<li>if there is simply a name after the <q><code>#</code></q>, it is a <em>shorthand</em> and treated like in HTML</li>
						<li>… almost, actually, because IDs in XML are a bit more complicated (<xml>xml:id</xml> or schema-defined)</li>
					</ul>
					<li>XPointer allows <em>child sequences</em> to navigate the document tree</li>
					<ul>
						<li><code>…#element(/1/2)</code> or <code>…#element(intro/3/1)</code></li>
						<li>the nice aspect is that elements without an explicit identification can be identified</li>
						<li>the risky part is that any changes in the document structure may break the XPointer</li>
					</ul>
					<li>XPointer in all its glory (W3C working draft December 2002 …)</li>
					<ul>
						<li>an extension of XPath inheriting all the interesting features of XPath</li>
						<li>adds the concept of <em>range locations</em> between to XPath-identified <em>point locations</em></li>
					</ul>
				</ul>
			</slide>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Know Your Resources</title>
				<ul>
					<li>Handling Web resources and technologies requires a common vocabulary</li>
					<li>Media types are a useful vocabulary for identifying resource types</li>
					<li>Fragment identifiers add some complexity, in particular for resource variants</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="usability" external="usability.pdf">
		<title>Usability</title>
		<date>2009-10-15</date>
		<toc class="reading"><a href="http://portal.acm.org/citation.cfm?id=286067" title='Chapter 3 (pp. 41-64) of Hugh Beyer and Karen Holtzblatt, "Contextual Design: Defining Customer-Centered Systems", Morgan Kaufmann, San Francisco, 1998'>Contextual Design (Chapter 3, pp. 41-64)</a></toc>
		<toc class="resources"><a href="http://www.useit.com/jakob/inspectbook.html">Heuristic Evaluation</a>&#160;· <a href="http://useit.com/" title="Jakob Nielsen's Website">useit.com</a></toc>
		<toc class="abstract">According to the <em>International Organization for Standardization (ISO)</em>, <em>usability</em> defines the extent to which a product can be used by specified users to achieve specified goals with effectiveness, efficiency and satisfaction in a specified context of use. We will discuss tradeoffs in the design of Web interfaces to support users goals, and present resources to aid design decisions.</toc>
	</presentation>
    <presentation id="scripting">
        <title short="Scripting">Scripting</title>
        <date>2009-10-20</date>
		<toc class="reading"><a href="http://www.yourhtmlsource.com/javascript/dhtmlexplained.html" title="DHTML Explained">DHTML</a></toc>
		<toc class="resources"><a href="http://domscripting.com/book/sample/" title='Sample Chapter from "DOM Scripting": Best Practices'>Best Practices</a>&#160;· <a href="http://www.webteacher.com/javascript/" title="JavaScript Tutorial">Tutorial</a>&#160;· <a href="http://en.wikipedia.org/wiki/Dynamic_HTML" title="Wikipedia: Dynamic HTML (DHTML)">Wikipedia</a></toc>
        <toc class="assignment"><a href="a/4/">A4</a>&#160;assigned (due&#160;date:&#160;11/1)</toc>
        <toc class="abstract">Scripting is used on the majority of today's modern Web sites. Scripting can be used to improve the usability and accessibility of a Web site (for example for <em>validating form data on the client side</em>), it can vastly improve the user experience with new interface design (the <em>smooth scrolling of Google Maps</em> vs. older <q>click to scroll</q> map services), or it can be used to implement behavior that would be impossible without scripting (for example the <em>online applications of Google Docs</em>). <em>Asynchronous JavaScript and XML (Ajax)</em> takes <em>Dynamic HTML (DHTML)</em> to the next level by allowing server access from within scripting code. This is accomplished by using a standardized API for client/server communications, the <code>XMLHttpRequest</code> object. This objects allows using HTTP connections from within scripting code, and thereby allows scripting code to dynamically reload data from a server in response to user interactions.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
		<slide>
			<title>Scripting on the Web</title>
			<ul>
				<li>Web pages were static HTML</li>
				<ul>
					<li><link href="forms"/> were the only interactive part of Web pages</li>
					<li>interaction was only possible by clicking links and visiting new pages</li>
					<li>CSS introduced limited dynamic behavior (such as <code>mouseOver</code> events)</li>
				</ul>
				<li>Netscape invented the <link href="dom"/> and <em>LiveScript</em></li>
				<ul>
					<li><a href="http://en.wikipedia.org/wiki/Java_(programming_language)">Java</a> was new and hip, so the language was renamed to <link href="javascript"/></li>
					<li>pages with scripting (a.k.a. <em>Dynamic HTML</em> or <em>DHTML</em>) allowed richer user interfaces</li>
					<li>other browsers invented their own <q>versions</q> of DOM/JavaScript</li>
				</ul>
				<li>Scripting was and is often used to implement <q>missing functionality</q></li>
				<ul>
					<li>good scripting supports graceful degradation (leaving the page functional)</li>
					<li>bad scripting compromises accessibility when the scripting code does not work</li>
				</ul>
				<li>Any non-trivial scripting has to deal with browser differences</li>
				<ul>
					<li><link href="js-frameworks"/> help by providing a foundation to build on</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>The Joys of Web Design</title>
			<img style="height : 75% ; margin : 2% ; " src="web-design.png" title="Time Breakdown of Modern Web Design" href="http://www.poisonedminds.com/"/>
		</slide>
        <slide id="dhtml">
			<title>Basic Scripting (DHTML)</title>
			<listing src="nicetitle.html" title="Improving the mouseOvers for HTML title attributes (HTML)"/>
        </slide>
        <slide>
			<title>Basic Scripting (JavaScript)</title>
			<listing src="nicetitle.js" line="18-32" title="Improving the mouseOvers for HTML title attributes (JavaScript)"/>
			<listing src="nicetitle.js" line="78-84" title="Improving the mouseOvers for HTML title attributes (JavaScript)"/>
        </slide>
        <slide>
			<title>Basic Scripting (CSS)</title>
			<listing src="nicetitle.css" title="Improving the mouseOvers for HTML title attributes (CSS)"/>
        </slide>
        <part id="javascript">
			<title>JavaScript</title>
			<slide>
				<title>Browsers are Platforms</title>
				<ul>
					<li><em>Runtime environments</em> are critical for running applications</li>
					<ul>
						<li>popular computer environments are Windows, MacOS, Linux, and Java</li>
						<li>popular mobile environments are iPhone, Android, Blackberry, S60, and JavaME/JavaFX</li>
						<li>popular Web-based environments are JavaScript, Flash, and Java applets</li>
						<li>popular Web-oriented environments are Silverlight and AIR</li>
					</ul>
					<li>JavaScript is a scripting language supported by most browsers</li>
					<ul>
						<li>access to the current document's <link href="dom">DOM</link> is the most important part of DHTML</li>
						<li>JavaScript has code, functions, and interacts with the user through the browser</li>
					</ul>
				</ul>
				<pre>&lt;p>The current time is &lt;script type="text/javascript">
var currentTime = new Date() ;
document.write(currentTime.getHours() + ":" + currentTime.getMinutes()) ;
&lt;/script>.&lt;/p>
</pre>
				<pre>&lt;p>The current time is <script type="text/javascript">
var currentTime = new Date() ;
document.write(currentTime.getHours() + ":" + currentTime.getMinutes()) ;
</script>.&lt;/p>
</pre>
			</slide>
			<slide>
				<title>Compiled vs. Interpreted Languages</title>
				<ul>
					<li>Programming languages can be <em>compiled</em> or <em>interpreted</em></li>
					<li><em>Compilers</em> can do more than just translate</li>
					<ul>
						<li>compiled languages are translated before they can be executed</li>
						<li>check the code for errors that can be determined statically</li>
						<li>augment the code for performance or analysis purposes</li>
						<li>compile/optimize the code for different runtime platforms</li>
					</ul>
					<li><em>Interpreters</em> provide a less heavyweight environment</li>
					<ul>
						<li>interpreted languages can be executed directly</li>
						<li>any change in the code can be tested immediately</li>
						<li>less tightly coupled software bundles than compiled packages</li>
					</ul>
					<li><em>Scripting</em> has become much more popular in the past years</li>
					<ul>
						<li>server-side languages such as PHP, Ruby/Rails, and Python</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>JavaScript and Browsers</title>
				<ul>
					<li>Scripting used to be <q>too slow for serious applications</q></li>
					<ul>
						<li>processors have become much faster</li>
						<li>language interpreters have become much smarter</li>
					</ul>
					<li>Implementations now deliver high-performance scripting</li>
					<ul>
						<li>Safari uses <a href="http://trac.webkit.org/wiki/SquirrelFish">the SquirrelFish JavaScript engine</a></li>
						<li>Chrome uses <a href="http://code.google.com/p/v8/">the V8 JavaScript engine</a></li>
					</ul>
					<li>Modern implementations allow sophisticated applications</li>
					<ul>
						<li><a href="http://docs.google.com/">Google Docs</a> is a set of browser-based <q>desktop applications</q></li>
						<li><a href="http://ejohn.org/blog/processingjs/">Processing.js</a> is a JavaScript version of a popular visualization environment</li>
					</ul>
					<li>Some platforms switch to interpreted languages</li>
					<ul>
						<li>Palm's <em>webOS</em> uses JavaScript as the native language</li>
						<li>Google's <em>ChromeOS</em> uses the browser's JavaScript as the native language</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part id="dom">
			<title short="DOM">Document Object Model (DOM)</title>
			<slide>
				<title>From HTML to DOM</title>
				<ul>
					<li><link href="html">HTML</link> is a representation for hypermedia documents</li>
					<ul>
						<li>a representation is required to store and transmit the document</li>
						<li>HTML uses <link href="html-markup">markup</link> for representing the document structure</li>
					</ul>
					<li>Browsers must render HTML documents (i.e., apply CSS and execute JavaScript)</li>
					<ol>
						<li><http>GET</http> HTML from server and receive as <mime>text/html</mime> document</li>
						<li>parse document and deal with any errors by <q>fixing them</q></li>
						<li>interpret document as if it had been error-free</li>
						<li><http>GET</http> all additional resources (CSS, images, JavaScript, …)</li>
						<li>build internal model (DOM) based on error-free interpretation</li>
						<li>apply CSS rules to determine styling of document (e.g., margins and font sizes)</li>
						<li>render into visual structure</li>
						<li>start executing JavaScript code</li>
						<li>listen for events (keyboard, mouse, timer) and execute code</li>
						<li>discard everything and start over when user navigates to a different page</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Browser Handling of HTML</title>
				<img style="width : 90% ; margin : 2% ; " src="html-parser.png" title="Browser Handling of HTML"/>
			</slide>
			<slide id="dom-document">
				<title>Document</title>
				<ul>
					<li>The document (HTML) is the <em>interface language for Web applications</em></li>
					<li>Most programming environments have visual interface models</li>
					<ul>
						<li>almost everything has moved to <em>window-oriented interfaces</em></li>
						<li>Windows, MacOS, and Linux provide similar visual metaphors</li>
					</ul>
					<li>Web applications must use HTML as their model for the interface</li>
					<ul>
						<li><link href="forms"/> are a simple way to build an interface</li>
						<li>forms can be extended with client-side helpers (validation, repeating entries)</li>
					</ul>
				</ul>
			</slide>
			<slide id="dom-object">
				<title>Object</title>
				<ul>
					<li>Documents are static, programming is dynamic</li>
					<ul>
						<li>documents and code must be connected</li>
						<li><em>objects</em> are a common abstraction in programming languages</li>
					</ul>
					<li>Objects usually have a <em>type</em> and <em>methods</em></li>
					<ul>
						<li><em>types</em> for HTML-based objects are based on HTML's elements</li>
						<li><em>methods</em> define the allowed to interact with objects</li>
						<li>interactions can be read-only or they can change the document structure</li>
					</ul>
				</ul>
				<listing src="nicetitle.js" line="22-32" title="Improving the mouseovers for HTML title attributes (JavaScript)"/>
			</slide>
			<slide id="dom-model">
				<title>Model</title>
				<ul>
					<li>Models are idealized/abstract views of something</li>
					<li>Model interfaces allow to expose that idealized/abstract view</li>
					<li>DOM introduced a <em>common way of how browsers deal with HTML</em></li>
					<ul>
						<li>without a DOM, there can be no interoperable scripting</li>
					</ul>
					<li>Abstractions are also limitations</li>
					<ul>
						<li>some vendors add/support <q>extensions</q> to the basic DOM model</li>
						<li>any code based on these extensions is not interoperable</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Markup to Trees</title>
				<ul>
					<li>Browsers build a DOM tree from the (fixed) markup</li>
					<ul>
						<li>scripting code never works on the markup text, it works on the tree</li>
					</ul>
					<li>DOM trees are an abstraction of the markup</li>
					<ul>
						<li>trees provide convenient navigation facilities</li>
						<li>abstractions provide an insulation against irrelevant markup details</li>
					</ul>
					<li>DOM is available in a large number of programming languages</li>
					<ul>
						<li><link href="javascript"/> is the language supported in Web browsers</li>
						<li>DOM is also available for Java, C, C++, Python, Perl, C#, Ruby, …</li>
					</ul>
				</ul>
			</slide>
			<slide id="domhistory">
				<title>DOM History</title>
				<ul>
					<li>DOM0 was invented by Netscape (backing the LiveScript/JavaScript)</li>
					<li>DOM1 was the first DOM version produced by the W3C</li>
					<li>DOM2 is the currently available stable version of DOM</li>
					<ul>
						<li>major changes to be compliant with XML and the XML Infoset</li>
					</ul>
					<li>DOM3 is highly modularized and still under development</li>
					<ul>
						<li>more XML technologies such as the ability to use XPath</li>
					</ul>
				</ul>
			</slide>
			<slide id="dom2">
				<title>DOM2 Map</title>
				<img style="height : 70% ; margin : 2% ; " src="dom2-map.png"/>
			</slide>
			<slide id="dom3">
				<title>DOM3 Map</title>
				<img style="height : 70% ; margin : 2% ; " src="dom3-map.png"/>
			</slide>
        </part>
		<part id="ajax">
			<title short="Ajax">Asynchronous JavaScript and XML (Ajax)</title>
			<slide>
				<title>Ajax = DHTML + HTTP</title>
				<ul>
					<li><link href="dhtml"/> uses JavaScript <q>locally</q></li>
					<ul>
						<li>the scripting code reacts to user events and accesses the DOM structure</li>
						<li>changes are either hardcoded or derived from user events</li>
					</ul>
					<li><link href="ajax">Ajax</link> adds an <link href="http-request">HTTP request</link> method to JavaScript</li>
					<ul>
						<li>scripting code can now request additional data from an HTTP server</li>
						<li>changes can thus be made based on any data received from the server</li>
					</ul>
					<li>Ajax dramatically reduces the number of page reloads</li>
					<ul>
						<li>any change of the page can be done without a complete reload</li>
						<li>based on user interactions, parts of the page can be reloaded</li>
					</ul>
					<li>Ajax has the same interoperability problems as DHTML</li>
				</ul>
			</slide>
			<slide>
				<title>Ajax and DHTML</title>
				<img style="width : 90% ; margin : 2% ; " src="ajax.png" title="Comparison of Ajax and DHTML"/>				
			</slide>
			<slide>
				<title>JavaScript and XML</title>
				<ul>
					<li>The <code>XMLHttpRequest</code> API has been built for requesting XML via HTTP</li>
					<ul>
						<li>this is useful because XML is the most popular data format</li>
						<li>all requested data has to be processed by using XML access methods in JavaScript</li>
					</ul>
					<li>JavaScript does not have XML as its internal data model</li>
					<ul>
						<li>the XML received via <code>XMLHttpRequest</code> has to be parsed into a DOM tree</li>
						<li>DOM access in JavaScript is inconvenient for complex operations</li>
						<li>alternatively, the XML can be mapped to JavaScript objects (also requires parsing)</li>
					</ul>
					<li><em>JavaScript Object Notation (JSON)</em> encodes data as JavaScript objects</li>
					<ul>
						<li>more efficient for the consumer if the consumer is written in JavaScript</li>
						<li>this turns the generally usable XML service into a JavaScript-oriented service</li>
						<li>for large-scale applications, it might make sense to provide XML and JSON</li>
						<li>using <link href="http-conneg"/>, this can be negotiated on the HTTP protocol level</li>
					</ul>
				</ul>
			</slide>
			<slide id="json">
				<title>JSON Example</title>
				<listing src="menu.xml"/>
				<listing src="menu.json"/>
			</slide>
			<slide>
				<title>JSON via Content Negotiation</title>
				<ul>
					<li>XML or JSON are just different representations</li>
					<ul>
						<li>they represent the same underlying resource (as identified by the URI)</li>
					</ul>
					<li><link href="http-conneg"/> allows to specify preferences</li>
					<ul>
						<li>clients specify preferences and server respond with the best match</li>
					</ul>
					<li> HTTP <http>Accept</http> specifies the accepted <link href="mediatypes"/></li>
					<ul>
						<li>resources may be available in HTML, XML, or JSON</li>
						<li>depending on the HTTP request, the server responds with the best representation</li>
						<li>reduces processing time on the client and can be cached</li>
					</ul>
					<li>Really smart Ajax frameworks could even hide the content negotiation</li>
					<ul>
						<li>request JSON or XML, with XML being the lower priority</li>
						<li>if XML is sent by the server, it has to be parsed into a JavaScript object</li>
						<li>the end result is always a JavaScript object for the framework user</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part id="js-frameworks">
			<title>JavaScript Frameworks</title>
			<slide>
				<title>Abstraction and Reality</title>
				<ul>
					<li>Browsers are not entirely standards-compliant</li>
					<ul>
						<li><a href="http://www.acidtests.org/">Acid Tests</a> are a way how to test browser compliance</li>
						<li>compliance depends on what you test for (versions of the standards)</li>
					</ul>
					<li>Running <a href="http://acid3.acidtests.org/">Acid3</a> for current browsers is disappointing</li>
					<ul>
						<li>Chrome and Safari are equal (because they both use <a href="http://webkit.org/">WebKit</a></li>
						<li>Firefox and Opera are not that bad (but not perfect)</li>
						<li>IE8 is a disaster</li>
					</ul>
					<li>In some cases, implementations have to make guesses</li>
					<ul>
						<li>complex combinations of HTML, CSS, and JavaScript interactions</li>
					</ul>
					<li>JavaScript frameworks have two major functions</li>
					<ol>
						<li>hiding the fact that browsers need a lot of special case handling</li>
						<li>providing support for common Web design patterns</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Web Design Patterns</title>
				<ul>
					<li>Many Web pages use similar ideas/visualizations</li>
					<li>Factoring them into <em>design patterns</em> enables tool support</li>
					<li>Providing access to a tree-structured set of resources</li>
					<ul>
						<li><a href="http://developer.yahoo.com/yui/examples/treeview/folder_style.html">folder views</a> are a common design pattern</li>
					</ul>
					<li>Displaying image captions based on mouse events</li>
					<ul>
						<li><a href="http://homegel.co.za/imagecaption/">dynamic image captions</a> help combining images and their captions</li>
					</ul>
					<li>Tabs are well-known from desktop applications and popular in Web design</li>
					<ul>
						<li><a href="http://extjs.com/deploy/dev/examples/tabs/tabs.html">Ajax Tabs</a> can even load content dynamically</li>
					</ul>
					<li>Image-heavy Web sites might need image viewing support</li>
					<ul>
						<li><a href="http://demos.dojotoolkit.org/demos/cropper/">image zooming</a> can make it more convenient to zoom into images</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Popular Frameworks</title>
				<ul>
					<li>Different needs produce different frameworks</li>
					<ul>
						<li><a href="http://www.dojotoolkit.org/">Dojo</a></li>
						<li><a href="http://extjs.com/">Ext JS</a></li>
						<li><a href="http://jquery.com/">jQuery</a></li>
						<li><a href="http://developer.yahoo.com/yui/">Yahoo! YUI Library</a></li>
					</ul>
					<li>There is no such thing as the <q>best JavaScript framework</q></li>
					<ul>
						<li>for any given project, decide on the support you need</li>
						<li>evaluate frameworks for the support they provide</li>
						<li>evaluate for <em>functional requirements</em> (<q>is there a collapse/expand folder view?</q>)</li>
						<li>evaluate for <em>non-functional requirements</em> (<q>is the framework openly licensed</q>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Important Framework Questions</title>
				<ul>
					<li>How big is it?</li>
					<li>How is it licensed?</li>
					<li>How is it maintained?</li>
					<li>How well does it support graceful degradation?</li>
					<li>How well does it mix with other JavaScript code?</li>
					<li>For professional Web development, don't overuse effects</li>
				</ul>
			</slide>
        </part>
        <slide>
			<title>Conclusions</title>
			<ul>
				<li>Scripting has become as essential part of Web-based applications</li>
				<li>DHTML is local scripting, Ajax is scripting + server access</li>
				<ul>
					<li>Ajax implements a lightweight client/server model</li>
					<li>Ajax can range from helper functions to complete local applications</li>
					<li>Creating better interfaces for Ajax applications is a research issue</li>
				</ul>
				<li>Deciding between client-side and server-side is hard</li>
				<li>JavaScript frameworks help developing script-based applications</li>
				<li>Graceful degradation becomes more important on the mobile Web</li>
			</ul>
        </slide>
    </presentation>
    <presentation id="content-context">
        <title>Content vs. Context</title>
        <date>2008-10-22</date>
        <toc class="assignment"><a href="a/5/">A5</a>&#160;assigned (due&#160;date:&#160;11/8)</toc>
        <toc class="abstract">The Web often is regarded as a <em>content</em> delivery platform (Web 1.0) or as an application development platform (Web 2.0). However, as another part of the Web 2.0 model, <em>context</em> also has become much more important, because (a) users can now more easily contextualize content by creating their own content, and (b) mechanisms such as <em>social networking</em> provide additional context that is bound to a user's identity and the social networks in which this user is engaged. In this lecture, we discuss the move from pure content to a more contextualized view of the content on the Web, and we discuss possible developments and their technical and non-technical implications.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="content-delivery">
			<title>Content Delivery</title>
			<slide>
				<title>Putting Content into Context</title>
				<ul>
					<li>Content are decontextualized Web pages</li>
					<li>Blogging mediates content</li>
					<ul>
						<li>blogs link to content and put it into context</li>
						<li>the context of the blog is unknown (invisible readership)</li>
					</ul>
					<li>Twitter mediates content and context</li>
					<ul>
						<li>tweets link to content and put it into context</li>
						<li>twitter's follower concept makes readership explicit</li>
						<li>retweeting reinforces the validity of context</li>
					</ul>
				</ul>
			</slide>
			<slide id="content-web10">
				<title>Content a la Web 1.0</title>
				<img style="height : 70% ; margin : 2% ; " src="content-context-web10.png"/>
			</slide>
			<slide id="content-blog">
				<title>Content and Blogs</title>
				<img style="height : 70% ; margin : 2% ; " src="content-context-blogs.png"/>
			</slide>
			<slide id="content-twitter">
				<title>Content and Twitter</title>
				<img style="height : 70% ; margin : 2% ; " src="content-context-twitter.png"/>
			</slide>
			<slide id="content-sidewiki">
				<title>Content and Annotations</title>
				<img style="height : 70% ; margin : 2% ; " src="content-context-sidewiki.png"/>
			</slide>
		</part>
		<part id="context-sources">
			<title>Context Types</title>
			<slide id="semantics-context">
				<title>Semantics as Context</title>				
				<ul>
					<li>Knowing more about content helps finding it</li>
					<li>Content can be more explicit about its structure</li>
					<li>Content can be embedded into contextual meaning</li>
					<ul>
						<li>ontologies help describing and relating concepts</li>
					</ul>
				</ul>
			</slide>
			<slide id="location-context">
				<title>Location as Context</title>
				<ul>
					<li>Location is a central property of real-world objects</li>
					<li>Knowing location can be critical to utility</li>
					<li>Location on the Web still is not exposed properly</li>
					<ul>
						<li>many applications use location internally</li>
						<li>using location across applications/services is still unexplored</li>
					</ul>
				</ul>
			</slide>
			<slide id="social-context">
				<title>Social Context</title>				
				<ul>
					<li>Social groups can be critical for information access</li>
					<li>For the longest time, social groups were essential</li>
					<ul>
						<li>information is only available as made available by society/culture</li>
						<li>Web-based information access removes this filter</li>
					</ul>
					<li>Social context is important in two distinct ways</li>
					<ol>
						<li>finding content made available through the social/cultural filter</li>
						<li>contextualizing content my using/accessing/annotating it</li>
					</ol>
				</ul>
			</slide>
			<slide id="time-context">
				<title>Time as Context</title>				
				<ul>
					<li>Content often has an implicit aspect of time</li>
					<li>Finding something should depend on (at least) two time dimensions</li>
					<ul>
						<li>the time when the content was created</li>
						<li>the time when the content was searched/accessed</li>
						<li>optionally, updates and annotations can be used as well</li>
					</ul>
				</ul>
			</slide>
		</part>
		<slide>
			<title>Conclusions</title>
			<ul>
				<li>Content is more valuable with context</li>
				<li>The Web becomes increasingly context-aware</li>
				<li>More ubiquity means more opportunities for contextualization</li>
				<li>Context often is partly implicit and hard(er) to control</li>
				<li>Context technologies develop much faster than content technologies</li>
			</ul>
		</slide>
    </presentation>
    <presentation id="pictures">
        <title short="Pictures">Picture Formats</title>
        <date>2009-10-27</date>
        <toc class="resources"><a href="http://www.w3.org/Graphics/GIF/spec-gif89a.txt" title="GIF Spec">GIF</a>&#160;· <a href="http://www.w3.org/Graphics/JPEG/" title="JFIF Spec">JPEG</a>&#160;· <a href="http://www.w3.org/TR/PNG/" title="W3C PNG Spec">PNG</a></toc>
        <toc class="abstract">Pictures are the only multimedia content on the Web that is widely supported by standardized formats. The most important picture formats are the <em>Graphics Interchange Format</em>, the <em>Joint Photographic Experts Group (JPEG)</em> format, and the <em>Portable Network Graphics (PNG)</em> format. These picture formats target different application areas and depending on the picture material, choosing one format over the other can make a big difference. Audio and video in many cases are not handled by the browser itself, but are included in this overview of multimedia on the Web.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide id="picture-formats">
			<title>Images vs. Graphics</title>
			<img src="vector-vs-bitmap.png" style="float : right ; margin : 0 1em 2em 2em ; " href="http://help.adobe.com/en_US/Director/11.0/help.html?content=06_bitmaps_01.html" title="Vector vs. Bitmap"/>
			<ul>
				<li>Pictures can be encoded in a <a href="http://en.wikipedia.org/wiki/Comparison_of_graphics_file_formats">wide variety of ways</a></li>
				<li><em>Images</em> are bitmaps of pixels that represent a picture</li>
				<ul>
					<li>it takes some kind of <em>scanning process</em> to produce images</li>
					<li>images have a certain <em>resolution</em> based on the quality of the scanning process</li>
					<li>scanning can take place in a scanner, in a fax machine, or in a camera's CCD</li>
				</ul>
				<li><em>Graphics</em> are composed out of graphic primitives</li>
				<ul>
					<li>graphics can be searchable, stylable, and scalable</li>
					<li>the graphics format can have very different capabilities (2D vs. 3D)</li>
				</ul>
				<li>Graphics preserve model-level information</li>
				<ul>
					<li>this only makes sense if there <em>is</em> a model</li>
					<li>rendering and styling can be an expensive process (e.g., video games)</li>
					<li>images can be a snapshot of some specific <q>view</q> of graphics</li>
				</ul>
				<li>Today's Web supports images, but not graphics</li>
			</ul>
		</slide>
		<part id="image-formats">
			<title>Image Formats</title>
			<part id="gif">
				<title short="GIF">Graphics Interchange Format (GIF)</title>
				<slide>
					<title short="GIF">Graphic Interchange Format (GIF)</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC2046">RFC 2046</a> registers the oldest graphics format on the Web</li>
						<li>GIF was subject of a long patent debate</li>
						<ul>
							<li>the compression technique of GIF (<a href="http://en.wikipedia.org/wiki/Lzw" title="Lempel-Ziv-Welch">LZW</a>) had been patented by Unisys (1983)</li>
							<li>Unisys wanted to get licensing fees from all commercial online uses of GIF</li>
							<li><link href="png"/> was developed as an effort to develop a copyright-free format</li>
							<li>in 1999, Unisys changed its tactics and wanted to collect one-time fees ($5000-$7500) from all users</li>
							<li>all GIF-related LZW expired in 2003/2004, so GIF is freely available now</li>
						</ul>
						<li>GIF's poor features make PNG the better choice anyway</li>
						<ul>
							<li>8 bit color (requires dithering for photographs), binary transparency</li>
							<li>GIF's animation feature is the only thing that is not available in PNG … <img src="running-wolf.gif" style="height : 0.8em"/></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="jpeg-group">
				<title short="JPEG">Joint Photographic Experts Group (JPEG)</title>
				<slide id="jpeg">
					<title short="JPEG">Joint Photographic Experts Group (JPEG)</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC2046">RFC 2046</a> standardizes the second popular image format for the Web</li>
						<ul>
							<li><a href="http://dret.net/biblio/reference/iso10918">ISO 10918</a> is the standard for the actual image format</li>
						</ul>
						<li>JPEG has been specifically designed for photographs</li>
						<ul>
							<li>it always is lossy (it cannot preserve the complete information from a random bitmap)</li>
							<li>it uses perception-based compression (for example, color precision is sacrificed for brightness)</li>
						</ul>
					</ul>
					<table style="width : 90% ; margin : 4% ;  font-size : smaller ; ">
						<tr>
							<td align="center">
								<img src="jpeg-average-quality.jpg" title="Average Quality JPEG" href="http://en.wikipedia.org/wiki/JPEG#Photographs"/>
							</td>
							<td align="center">
								<img src="jpeg-low-quality.jpg" title="Low Quality JPEG" href="http://en.wikipedia.org/wiki/JPEG#Photographs"/>
							</td>
							<td align="center">
								<img src="jpeg-lowest-quality.jpg" title="Lowest Quality JPEG" href="http://en.wikipedia.org/wiki/JPEG#Photographs"/>
							</td>
						</tr>
						<tr>
							<th>Q = 50, filesize 15,138 bytes</th>
							<th>Q = 10, filesize 4,787 bytes</th>
							<th>Q = 1, filesize 1,523 bytes</th>
						</tr>
					</table>
				</slide>
				<slide id="jpeg2000">
					<title>JPEG 2000</title>
					<ul>
						<li>JPEG has some problems (for example, it is never lossless)</li>
						<ul>
							<li>compression technology has advanced since JPEG (<a href="http://www.acm.org/crossroads/xrds6-3/sahaimgcoding.html">DCT → Wavelets</a>)</li>
						</ul>
						<li><em>JPEG 2000</em> is a completely new standard</li>
						<ul>
							<li>it uses wavelets instead of DCT as the compression algorithm</li>
							<li>it includes support for lossless encoding (JPEG is always lossy)</li>
							<li>it even comes with its own transmission protocol (<a href="http://en.wikipedia.org/wiki/JPIP" title="JPEG 2000 Interactive Protocol">JPIP</a>)</li>
							<li><mime>image/jp2</mime> (<a href="http://dret.net/glossary/jp2">JP2</a>) and <mime>image/jpx</mime> (<a href="http://dret.net/glossary/jpx">JPX</a>) are the two JPEG 2000 MIME types</li>
						</ul>
						<li>Support for JPEG 2000 is good but not universal</li>
					</ul>
				</slide>
			</part>
			<part id="png">
				<title short="PNG">Portable Network Graphics (PNG)</title>
				<slide>
					<img src="png-transparency.png" style="float : right ; "/>
					<title short="PNG">Portable Network Graphics (PNG)</title>
					<ul>
						<li>PNG is registered as <mime>image/png</mime> and is the third major image format</li>
						<ul>
							<li>PNG was intended to be a royalty- and copyright-free replacement of <link href="gif">GIF</link></li>
							<li>image formats need to supported by browsers and thus take a long time until they are established</li>
							<li>IE6 implements PNG in a very rudimentary form, IE7 handles PNG correctly</li>
						</ul>
						<li>PNG has some advantages over GIF and JPEG</li>
						<ul>
							<li>lossless, compressed palette, grayscale, or true color images</li>
							<li>8 bit alpha channel for gradual opacity (blending into the background)</li>
						</ul>
						<li>JPEG still is the preferred format for photographic pictures</li>
						<li>GIF still is the preferred format for animated images</li>
						<ul>
							<li><a href="http://en.wikipedia.org/wiki/Mng" title="Multiple-image Network Graphics">MNG</a> and <a href="http://en.wikipedia.org/wiki/APNG" title="Animated Portable Network Graphics">APNG</a> are two available but not widely supported PNG animation formats</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Alpha Channel Effects</title>
					<table width="90%">
						<tr>
							<tr>
								<td style="background : #000000"><img src="png-transparency.png"/></td>
								<td style="background : #404040"><img src="png-transparency.png"/></td>
								<td style="background : #606060"><img src="png-transparency.png"/></td>
							</tr>
							<tr>
								<td style="background : #808080"><img src="png-transparency.png"/></td>
								<td style="background : #A0A0A0"><img src="png-transparency.png"/></td>
								<td style="background : #C0C0C0"><img src="png-transparency.png"/></td>
							</tr>
						</tr>
					</table>
				</slide>
			</part>
			<part>
				<title>Other Image Formats</title>
				<slide id="tiff">
					<title short="TIFF">Tagged Image File Format (TIFF)</title>
					<ul>
						<li>Standard file format format for scanned images</li>
						<ul>
							<li>none of the limitations of <link href="gif"/></li>
							<li>ability to represent any kind of bitmapped image information</li>
							<li>compression is supported but always lossless (not as effective as <link href="jpeg">JPEG</link>)</li>
						</ul>
						<li>Popular for scanned images and similar applications</li>
						<ul>
							<li>native support in browsers is the exception (only Safari)</li>
						</ul>
					</ul>
				</slide>
				<slide id="ico">
					<title>Icons</title>
					<ul>
						<li>Original <em>favicon</em> (a.k.a. <em>page icon</em> or <em>urlicon</em>) format</li>
						<ul>
							<li>the image format used by Windows for its icons (which is identical to the cursor format)</li>
						</ul>
						<li>Important on the Web only for Web site icons</li>
						<ul>
							<li>most browsers will display the icons in the address bar</li>
							<li>most browsers will remember the icon for bookmarks and shortcuts</li>
						</ul>
						<li>Icons appear as meta information of a Web page</li>
						<ul>
							<li>standards are not really tight and based on best practices and browsers</li>
						</ul>
						<pre><![CDATA[<link rel="icon" href="/favicon.ico" type="image/x-icon" />
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />]]></pre>
					</ul>
				</slide>
			</part>
		</part>
		<part id="graphics-formats">
			<title>Graphics Formats</title>
			<slide>
				<title>Image Concepts</title>
				<ul>
					<li>Pictures of the real world are always lossy</li>
					<ul>
						<li>there is no (accessible) underlying <q>model</q> of the real world</li>
						<li>the real world is always pictured as well as technically possible</li>
					</ul>
					<li>Graphics start from some model and turn that into an image</li>
					<ul>
						<li>an <em>image</em> of a circle will always appear jagged on close inspection</li>
						<li>a graphical picture of a circle is always perfectly round</li>
					</ul>
					<li>Graphics can be customized based on presentation needs</li>
					<ul>
						<li>line and text color can be coordinated with the rest of Web page design</li>
						<li>any information can be used as input to the graphics → image conversion</li>
					</ul>
					<li>Graphics can be searched for concepts and contents</li>
				</ul>
			</slide>
			<part id="svg">
				<title>Scalable Vector Graphics (SVG)</title>
				<slide>
					<title>Graphics for the Web</title>
					<ul>
						<li>Identified as <q>important media type</q> a long time ago</li>
						<ul>
							<li>W3C work on SVG for a long time</li>
							<li>complex format with little support and little success on the Web</li>
						</ul>
						<li>Embedding SVG in HTML <a href="http://www.carto.net/papers/svg/samples/svg_html.shtml">is more art than science</a></li>
						<ul>
							<li>using SVG requires browser-specific HTML code</li>
							<li>backwards compatibility must support bitmapped version of the SVG pictures</li>
						</ul>
						<li>Support for SVG is provided by many newer tools</li>
						<ul>
							<li>the SVG code produced not always is a very good version of SVG</li>
							<li>SVG has not yet been adopted as something that people really care about</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Image vs. Graphics</title>
					<table width="90%">
						<tr>
							<td align="center" valign="top"><img width="290" height="420" src="shapes.png"/></td>
							<td align="center" valign="top"><embed width="290" height="420" src="src/shapes.svg" type="image/svg+xml"/></td>
						</tr>
					</table>
					<p>Source: <a href="http://www.carto.net/papers/svg/samples/shapes.shtml">carto.net</a></p>
				</slide>
				<slide>
					<title>SVG is XML</title>
					<listing src="shapes.svg"/>
				</slide>
			</part>
		</part>
		<part id="video+audio">
			<title>Video and Audio</title>
			<slide id="streaming">
				<title>Download vs. Streaming</title>
				<ul>
					<li>Web resources usually are downloaded</li>
					<ul>
						<li>browsers may choose to implement incremental rendering (e.g., HTML or images)</li>
						<li>the resource is completely downloaded and stored</li>
					</ul>
					<li><em href="http://en.wikipedia.org/wiki/Streaming_media">Streaming</em> means that there is no complete download</li>
					<ul>
						<li>TV and phone calls are classical examples of streaming</li>
						<li>any navigable media type can use streaming (<a href="http://www.scribd.com/tools/ipaper">iPaper</a> is <q>streamed PDF</q>)</li>
						<li>some data sources cannot be downloaded (e.g., a security camera)</li>
					</ul>
					<li>Streaming often is also used because of security issues</li>
					<ul>
						<li>downloads make it easy to get content and redistribute it</li>
						<li>streaming makes redistribution much harder (content must be <a href="http://en.wikipedia.org/wiki/Destreaming">destreamed</a>)</li>
						<li>the data formats for streaming are often undisclosed</li>
					</ul>
				</ul>
			</slide>
			<slide id="ipaper">
				<title>Streamed Paper</title>
				<object codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0" id="doc_718652791171161" name="doc_718652791171161" classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000" align="middle" height="600" width="800" style="margin : 1em ; "><param name="movie" value="http://d.scribd.com/ScribdViewer.swf?document_id=13057812&amp;access_key=key-2msp2ghgo1uheil5pzpu&amp;page=1&amp;version=1&amp;viewMode=slideshow"/><param name="quality" value="high"/><param name="play" value="true"/><param name="loop" value="true"/><param name="scale" value="showall"/><param name="wmode" value="opaque"/><param name="devicefont" value="false"/><param name="bgcolor" value="#ffffff"/><param name="menu" value="true"/><param name="allowFullScreen" value="true"/><param name="allowScriptAccess" value="always"/><param name="salign" value=""/><param name="mode" value="slideshow"/><embed src="http://d.scribd.com/ScribdViewer.swf?document_id=13057812&amp;access_key=key-2msp2ghgo1uheil5pzpu&amp;page=1&amp;version=1&amp;viewMode=slideshow" quality="high" pluginspage="http://www.macromedia.com/go/getflashplayer" play="true" loop="true" scale="showall" wmode="opaque" devicefont="false" bgcolor="#ffffff" name="doc_718652791171161_object" menu="true" allowfullscreen="true" allowscriptaccess="always" salign="" type="application/x-shockwave-flash" align="middle" mode="slideshow" height="600" width="800"></embed></object>
			</slide>
			<slide>
				<title>Video and Audio on the Web</title>
				<ul>
					<li><link href="internet">Internet Protocols</link> only provide <em>best-effort connections</em></li>
					<ul>
						<li><em>Quality of Service (QoS)</em> requires end-to-end QoS provisioning</li>
						<li>QoS was never implemented on the Internet for economic reasons</li>
					</ul>
					<li>Data types and expectations co-evolve with the infrastructure</li>
					<ul>
						<li>faster processors and graphics chips can handle high-resolution video</li>
						<li>faster networks and better compression make high-resolution feasible</li>
					</ul>
					<li>Almost all data traffic will eventually move to an Internet</li>
					<ul>
						<li>TV and telephony are two very popular examples</li>
						<li>almost all telephony is handled on <q>a</q> (not <em>the</em>) Internet today anyway</li>
					</ul>
					<li>The <em>public Internet</em> and <em>an Internet</em> are not the same thing</li>
					<ul>
						<li>companies and the military often have separate networks</li>
						<li>using Internet technologies for building a network is cost-efficient</li>
						<li>security and economics decide how Internets are connected</li>
					</ul>
				</ul>
			</slide>
			<slide id="cdn">
				<title>Content Delivery Networks (CDN)</title>
				<ul>
					<li>High-volume traffic is better not routed from one place</li>
					<ul>
						<li><a href="http://www.google.com/">Google</a> and <a href="http://www.youtube.com/">YouTube</a> only look like a <q>a site</q></li>
						<li>sophisticated routing and load balancing helps handling traffic</li>
					</ul>
					<li><em href="http://en.wikipedia.org/wiki/Content_Delivery_Network">Content Delivery Networks (CDN)</em> are designed for high-volume low-latency delivery</li>
					<ul>
						<li>clients in different parts of the world will be served by different servers</li>
						<li>the internal data distribution and management is handled by the CDN</li>
					</ul>
					<li>CDNs are required when sites start handling large traffic volumes</li>
					<ul>
						<li>CDN services can be bought by site/service owners</li>
						<li><a href="http://www.akamai.com/">Akamai</a> and <a href="http://www.limelightnetworks.com/">Limelight</a> are two popular services</li>
					</ul>
					<li>CDN are usually hidden by other technologies</li>
					<ul>
						<li>DNS responses for CDN hostnames are returned based on the request</li>
						<li>prepackaged video codecs for Flash/Silverlight have built-in CDN support</li>
					</ul>
				</ul>
			</slide>
			<slide id="cdn-routing">
				<title>CDN Request Routing</title>
				<img style="width : 90% ; margin : 2% ; " src="cdn-routing.jpg" title="CDN Request Routing" href="http://www.research.ibm.com/journal/sj/431/gayek.html"/>
			</slide>
			<slide id="audio">
				<title>Audio on the Web</title>
				<ul>
					<li>Audio is not very popular on the Web</li>
					<ul>
						<li>the Web is mostly visually oriented</li>
						<li>audio content without playback controls is not user-friendly</li>
						<li>most sites using multimedia use <link href="video">video</link> instead of audio</li>
					</ul>
					<li>Internet radios such as <a href="http://www.pandora.com/">Pandora</a> often use Flash</li>
					<ul>
						<li>they are standalone applications running in a browser</li>
						<li>content is often delivered via HTTP to circumvent firewalls</li>
					</ul>
					<li>Audio formats exist in many different variations</li>
					<ul>
						<li><em href="http://en.wikipedia.org/wiki/MP3">MPEG1 Layer 3 (MP3)</em> was the first widely supported audio format</li>
						<li><em href="http://en.wikipedia.org/wiki/Advanced_Audio_Coding">Advanced Audio Coding (AAC)</em> is <a href="http://en.wikipedia.org/wiki/FairPlay">Apple's preferred format because of DRM</a></li>
						<li>audio streaming formats often use much less bandwidth</li>
					</ul>
				</ul>
			</slide>
			<slide id="video">
				<title>Video on the Web</title>
				<ul>
					<li>Video formats have been evolving quickly for a while now</li>
					<ul>
						<li>video signals have a lot of redundancy that is hard to compute</li>
					</ul>
					<li>Depending on the application, algorithms ideally behave differently</li>
					<ul>
						<li>for playback of recorded content, encoding can be very expensive</li>
						<li>symmetric scenarios (such as video conferencing) better use symmetric codecs</li>
					</ul>
					<li>Handling video in <link href="plug-in"/> effectively implements dynamic codecs</li>
					<ol>
						<li><a href="http://www.youtube.com/">YouTube</a> started <a href="http://news.cnet.com/8301-10784_3-9817732-7.html">serving better quality a while ago</a></li>
						<li>the servers and the Flash plug-in have to be updated</li>
						<li>browsers reload the Flash code every time they load a YouTube page</li>
					</ol>
					<li>Video encoding combines time-enabled <link href="image-formats"/> and <link href="audio">audio</link></li>
					<ul>
						<li>both signals must be carefully synchronized</li>
						<li>sophisticated encodings use variable bitrates and even vary between video/audio rates</li>
					</ul>
				</ul>
			</slide>
		</part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Multimedia on the Web</title>
				<ul>
					<li>Images are the only supported media types on the Web</li>
					<li>Graphics and video and audio are not universally supported</li>
					<li>Image formats serve different purposes on the Web</li>
					<li>PNG for graphics and JPEG for photographic images</li>
					<li>GIF should be avoided (still required for animated images)</li>
				</ul>
			</slide>
        </part>
    </presentation>
	<presentation id="internet">
		<title short="Internet">Internet Architecture</title>
		<date>2009-10-29</date>
		<toc class="reading"><a href="http://www.acm.org/crossroads/xrds1-1/tcpjmy.html" title="ACM Crossroads: An Overview of the TCP/IP Protocol Suite">TCP/IP</a></toc>
		<toc class="resources"><a href="http://en.wikipedia.org/wiki/Category:Internet_architecture" title="Wikipedia: Internet Architecture">Internet Architecture</a>&#160;· <a href="http://www.garykessler.net/library/tcpip.html" title="An Overview of TCP/IP Protocols and the Internet">TCP/IP Overview</a>&#160;· <a href="http://www.zakon.org/robert/internet/timeline/" title="Hobbes' Internet Timeline">Timeline</a></toc>
		<toc class="abstract">The Internet is the technical infrastructure on top of which the Web is built. Some of the services provided by the Internet are essential for the Web, most importantly the naming service and the data transfer service. The <em>Domain Name System (DNS)</em> provides the human-readable names for computers, which can then be used in the addresses of Web servers and ultimately Web pages. The <em>Transmission Control Protocol (TCP)</em> provides the reliable data transfer service between Web Servers and Web Browsers, building on the very robust <em>Internet Protocol (IP)</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="networks">
			<title>Computer Networks</title>
			<slide>
				<title>Network History</title>
				<ul>
					<li>First regarded as a convenient workaround for floppy disks</li>
					<ul>
						<li><q>real computer scientists write compilers</q></li>
						<li>the value of computer networks depends on their size</li>
					</ul>
					<li>Early networking solutions were vendor-specific islands</li>
					<ul>
						<li>DECnet for <em>Digital Equipment Corporation (DEC)</em> customers</li>
						<li>XNS for <em>Xerox</em> customers</li>
						<li>SNA for <em>IBM</em> customers</li>
						<li>transmitting data between these networks was very cumbersome</li>
					</ul>
					<li>Bridging networks transparently became increasingly important</li>
					<ul>
						<li>more computers and networks increase the benefit of interconnections</li>
						<li>layering being used for internetworks, not only for networks</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Networks vs. Internetworks</title>
				<ul>
					<li>Specific networks use specific abstractions</li>
					<ul>
						<li>how to address nodes (computers, phones, PDAs, RFID tags)</li>
						<li>how to address applications on these nodes</li>
						<li>how to transmit data to these applications</li>
					</ul>
					<li>Internetworks provide a network-independent abstraction</li>
					<ul>
						<li>nodes are addressed uniformly (IP addresses)</li>
						<li>applications are identified uniformly (ports)</li>
						<li>data transmission uses one set of protocols (TCP/UDP)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Internet</title>
				<ul>
					<li>Very early start and a lot of experience</li>
					<ul>
						<li>pragmatic and evolutionary approach</li>
						<li><q>if it's not broken, don't fix it</q></li>
					</ul>
					<li>Standardization by independent technical experts</li>
					<ul>
						<li>avoids the <q>designed by committee</q> effect of consortiums</li>
						<li>conservative and concentrating on stability</li>
						<li>implementations are required to prove technical feasibility</li>
						<li>simplicity whenever possible</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Internet Principles</title>
				<blockquote>Be liberal in what you accept, and conservative in what you send.</blockquote>
				<p class="quotenote"><a href="http://www.postel.org/postel.html">Jon Postel</a>, <a href="http://dret.net/rfc-index/reference/RFC1122">RFC 1122</a></p>
				<blockquote>Whenever possible, communications protocol operations should be defined to occur at the end-points of a communications system, or as close as possible to the resource being controlled.</blockquote>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/sal84">J. Saltzer, D. Reed, D. Clark, <q>End-to-end Arguments in System Design</q></a></p>
			</slide>
			<slide>
				<title>Internet Protocols</title>
				<img style="width : 90% ; margin : 2% ; " src="internet-protocols.png" title="Internet Protocol Layers"/>
			</slide>
			<slide>
				<title>Network Convergence</title>
				<img style="width : 90% ; margin : 2% ; " src="network-convergence.png" title="Convergence in the Protocol Stack"/>
			</slide>
		</part>
        <part id="ip">
			<title short="IP">Internet Protocol (IP)</title>
			<slide>
				<title>IP Features</title>
				<ul>
					<li>End-to-end data transfer (IP addresses)</li>
					<li>Hiding lower-level heterogeneity</li>
					<li>Connection-less (each packet routed individually)</li>
					<li>Unreliable (packets may be lost or duplicated)</li>
				</ul>
			</slide>
			<slide id="ip-address">
				<title>IP Address</title>
				<ul>
					<li>IP identifies nodes by an IP address</li>
					<li>IP addresses are globally unique (<a href="http://api.hostip.info/get_html.php?position=true">and can be geocoded</a>)</li>
					<li>IP uses 4 bytes for addresses (e.g., <code>128.32.226.29</code>)</li>
					<ul>
						<li>maximum number of addresses: 2<sup>32</sup> = 4 billion</li>
                        <li>IPv6 extends the address format to 16 bytes (2<sup>128</sup> addresses) = ~ 10<sup>38</sup></li>
					</ul>
					<li>IP address shortage led to the some trickery using IP addresses</li>
					<ul>
						<li><em href="http://en.wikipedia.org/wiki/Dhcp">Dynamic Host Configuration Protocol (DHCP)</em> is used to assign addresses on-demand</li>
						<li><em href="http://en.wikipedia.org/wiki/Network_address_translation">Network Address Translation (NAT)</em> uses one IP address for more than one device</li>
					</ul>
					<li>IP addresses are well-organized</li>
					<ul>
						<li>important for routing (i.e., sending packets to the target host)</li>
						<li>not ideally suited for mobile or ad-hoc networks</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="tcp">
			<title short="TCP">Transmission Control Protocol (TCP)</title>
			<slide>
				<title>TCP Features</title>
				<ul>
					<li>Flow-controlled (avoiding congestion)</li>
					<li>Reliable (no data lost or duplicated)</li>
					<li>Connection-oriented</li>
					<li>Application addressing</li>
				</ul>
			</slide>
			<slide>
				<title>Reliable Connections</title>
				<ul>
					<li>IP may drop or duplicate packets</li>
					<ul>
						<li>TCP adds serial numbers in data packets</li>
						<li>if problems are detected, TCP recovers automatically</li>
					</ul>
					<li>TCP avoids network congestion and system overload</li>
					<ul>
						<li><em>slow start</em> avoid flooding receivers with data they cannot process</li>
						<li><em>fast retransmit</em> for avoiding timeouts when losing data</li>
						<li>a <em>sliding window</em> for controlling the amount of outstanding packets</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>TCP Window</title>
				<img style="height : 70% ; margin : 2% ; " src="tcp-window.png"/>
			</slide>
		</part>
		<part id="dns">
			<title short="DNS">Domain Name System (DNS)</title>
			<slide>
				<title>Naming vs. Addressing</title>
				<ul>
					<li>IP addresses depend on network topology and organization</li>
					<ul>
						<li>reorganizing a network may change all IP addresses</li>
						<li>identifying important hosts should not be address-based</li>
					</ul>
					<li>Names are supposed to be more stable than addresses</li>
					<ul>
						<li>a name is an abstract identification of something</li>
						<li>names can be used to obtain more information</li>
					</ul>
					<li>Network services should use names instead of addresses</li>
					<ul>
						<li>before using the service, a mapping has to be performed</li>
						<li>the <em>Domain Name System (DNS)</em> is providing this service</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>DNS Properties</title>
				<ul>
					<li>DNS has a bootstrap problem</li>
					<ul>
						<li>DNS provides a service and should thus be identified by a name</li>
						<li>for resolving names into addresses, the DNS service is required</li>
					</ul>
					<li>DNS configuration is part of basic Internet configuration</li>
					<ul>
						<li><em>Dynamic Host Configuration Protocol (DHCP)</em> provides <link href="ip-address"/>, netmask, gateway, and DNS server address</li>
					</ul>
					<li>DNS names are hierarchically structured</li>
					<ul>
						<li><code>ischool.berkeley.edu</code>, <code>edu</code> is the <em>Top-Level Domain (TLD)</em></li>
						<li>TLDs are either <em>generic (gTLD)</em> or <em>country code (ccTLD)</em></li>
						<li>subdomains are federated (e.g., <code>edu</code>, <code>us</code>, <code>uk</code>, <code>tv</code>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Names Matter</title>
				<ul>
					<li>Names are not unique and namespaces are finite</li>
					<ul>
						<li>name disputes arise which were irrelevant before the Web</li>
						<li><q>cybersquatting</q> as a popular way to make money</li>
					</ul>
					<li>Names can be worth a lot of money</li>
					<ul>
                        <li><code>business.com</code> was sold for $7.5M in 1999 and again for $345M in 2007</li>
					</ul>
					<li>Name inflation can be used to generate money</li>
					<ul>
						<li><code>aero</code>, <code>biz</code>, <code>coop</code>, <code>info</code>, <code>jobs</code>, <code>mobi</code>, <code>museum</code>, <code>name</code>, <code>pro</code>, <code>travel</code></li>
						<li>starting 2009, <a href="http://dret.typepad.com/dretblog/2008/06/dret.html">user-defined top-level domains will go on sale</a></li>
					</ul>
					<li>Names can have political significance</li>
					<ul>
						<li>ccTLDs are assigned based on the UNO's idea of what a country is</li>
					</ul>
					<li>Names can have symbolic significance</li>
					<ul>
						<li>Catalonia managed to get a domain of its own (<code>cat</code>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Domain Name Space</title>
				<img style="height : 70% ; margin : 2% ; " src="dns-namespace.png"/>
			</slide>
			<slide>
				<title>DNS Namespace Organization</title>
				<ul>
					<li>Domain owners can organize the assignment of subdomains</li>
					<ul>
						<li><code href="http://www.berkeley.edu/">berkeley.edu</code> is an U.S. educational institution</li>
						<li><code href="http://www.ethz.ch/">ethz.ch</code> is a Swiss university</li>
						<li><code href="http://www.imperial.ac.uk/">imperial.ac.uk</code> is a British university</li>
						<li><code href="http://www.uts.edu.au/">uts.edu.au</code> is an Australian university</li>
					</ul>
					<li>Special rules may apply (Germany does not assign car license plate names)</li>
					<li>Organizations may be countries or companies</li>
					<ul>
						<li>countries have national organizations for assigning names</li>
						<li>companies may create an internal multi-level namespace (<code href="http://www.ischool.berkeley.edu/">www.ischool.berkeley.edu</code>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Using DNS</title>
				<ul>
					<li>DNS is used by virtually all Internet applications</li>
					<ul>
						<li>names are more stable than addresses</li>
					</ul>
					<li>E-mail has some dedicated features built into DNS</li>
					<ul>
						<li>special entries (<code>MX</code> records) identify the e-mail server for a domain</li>
						<li>fallback entries help dealing with failing e-mail servers</li>
					</ul>
					<li>most URIs are based on DNS names</li>
					<ul>
						<li><code>http://ischool.berkeley.edu/</code> identifies the access protocol and the host</li>
						<li>the browser first performs a DNS lookup</li>
						<li>a TCP connection is then established to the address returned by the DNS</li>
					</ul>
				</ul>
			</slide>
		</part>
		<slide>
			<title>Conclusions</title>
			<ul>
				<li>The Internet is a network of networks</li>
				<ul>
					<li><a href="http://en.wikipedia.org/wiki/Ted_Stevens">Ted Stevens</a>: <q><a href="http://www.youtube.com/watch?v=f99PcP0aFNE" title="Ted Stevens on the Internet">It's not a big truck, it's a series of tubes!</a></q></li>
				</ul>
				<li>The Internet provides basic data transfer services</li>
				<li>IP is used to send <em>datagrams</em> between end-points</li>
				<li>TCP is used for reliable communications (building on IP)</li>
				<li>DNS provides human-readable names for Internet hosts</li>
			</ul>
		</slide>
    </presentation>
    <presentation id="foundations">
		<title short="Foundations">Web Foundations (URI &amp; HTTP)</title>
		<date>2009-11-03</date>
		<toc class="reading"><a href="http://en.wikipedia.org/wiki/Http" title="Wikipedia: HTTP">HTTP</a>&#160;· <a href="http://www.w3.org/Provider/Style/URI" title="Cool URIs don't change">Cool URIs</a></toc>
		<toc class="resources"><a href="https://addons.mozilla.org/en-US/firefox/addon/3829" title="Firefox Add-on: Live HTTP Headers">Live HTTP Headers</a>&#160;· <a href="http://www.garshol.priv.no/download/text/http-tut.html" title="How the Web works: HTTP and CGI explained">HTTP and CGI</a>&#160;· <a href="http://tools.ietf.org/html/rfc3986" title="IETF RFC 3986: Uniform Resource Identifier (URI)">URI Spec</a>&#160;· <a href="http://tools.ietf.org/html/rfc2616" title="IETF RFC 2616: Hypertext Transfer Protocol (HTTP)">HTTP Spec</a></toc>
		<toc class="abstract">The Web's architecture has very simple principles revolving around the ideas of placing a heavy emphasis on a consistent and global identification mechanism for resources, a standardized way of how resource representations can be retrieved, and a standardized way of how resource representations should be usable by using standardized media types. Based on the Internet, the Web's transport protocol transmits representations of resources identified by a <em>Uniform Resource Identifier (URI)</em> between Web servers and clients. The most important protocols for data transfer on the Web is the <em>Hypertext Transfer Protocol (HTTP)</em>.</toc>
		<slide>
		<title>Abstract</title>
		<p class="abstract"><toc class="abstract"/></p>
        </slide>
		<slide>
			<title>Web Server Service</title>
			<ul>
				<li>Web servers do more than just <q>deliver files</q></li>
				<li>They receive a request for acting on a resource</li>
				<ul>
					<li>this may be a simple file retrieval</li>
					<li>additional information is available from the request's <link href="http-headers">header fields</link></li>
					<li>the request URI may contain additional <em>query information</em></li>
					<li>the request may transmit complex data (such as a form submission)</li>
				</ul>
				<li>Processing can mean anything, it is transparent for the client</li>
				<ul>
					<li>the result of processing yields a <em>resource representation</em></li>
					<li>in many cases, a Web server is just part of an application</li>
					<li>the <em>application server</em> is the application-specific logic</li>
				</ul>
			</ul>
		</slide>
		<part id="uri">
			<title short="URI">Uniform Resource Identifier (URI)</title>
			<slide>
				<title>Resource Identification</title>
				<ul>
					<li>The Web is centered around resources</li>
					<ul>
						<li>HTTP has been designed to manipulate resources</li>
						<li>HTTP provides methods for getting, putting, updating, and even deleting resources</li>
					</ul>
					<li>Resources are useful abstractions for interfaces</li>
					<ul>
						<li>instead of an API, interaction is built around manipulating resources</li>
						<li>APIs change and bind closely, documents can better withstand change and bind loosely</li>
						<li>the whole Web is built around resources, not APIs</li>
					</ul>
				</ul>
			</slide>
			<slide id="uri-schemes">
				<title>URI Schemes</title>
				<pre>URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]</pre>
				<pre>http://dret.net/lectures/web-fall09/foundations#uri-schemes</pre>
				<ul>
					<li>URIs in their general case are very simple</li>
					<ul>
						<li>the scheme identifies how resources are identified</li>
						<li>the identification may be hierarchical or non-hierarchical</li>
					</ul>
					<li>Many URI schemes are hierarchical</li>
					<ul>
						<li>it is then possible to use relative URIs such as in <htmel>a href="../"</htmel></li>
						<li>the slash character is not just a character, in URIs it has semantics</li>
					</ul>
					<li>Query components specify additional information</li>
					<ul>
						<li>it is non-hierarchical information further identifying the resource</li>
						<li>in most cases, it can be regarded as <q>input</q> to the resource</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="http">
			<title short="HTTP">Hypertext Transfer Protocol (HTTP)</title>
			<slide>
				<title>DNS &amp; HTTP</title>
				<p>The two basic protocols which every Web browser must implement are <link href="dns">DNS</link> access and <link href="http">HTTP</link>. However, most operating systems provide an API for DNS access, so the browser can use this service locally and only has to implement HTTP. <link href="tcp">TCP</link> (which is required as the foundation for HTTP) is usually provided by the operating system.</p>
				<img style="width : 90% ; margin : 2% ; " src="browser-dns-http.png"/>
			</slide>
			<slide>
				<title>The Web's Protocol</title>
				<img style="height : 60% ; margin : 4% ; float : left ; " src="internet-traffic-trends.png"/>
				<p class="quotenote">provided by <a href="http://www.cachelogic.com/">CacheLogic Inc.</a></p>
			</slide>
			<part>
				<title>HTTP Basics</title>
				<slide>
					<title>HTTP Messages</title>
					<ul>
						<li>HTTP needs a reliable connection</li>
						<ul>
							<li>the foundation for HTTP is the <link href="tcp"/></li>
							<li>DNS resolution yields an IP address</li>
							<li>open TCP connection to port 80 or port specified in URI (<code>http://rosetta.ischool.berkeley.edu:8085/</code>)</li>
						</ul>
						<li>HTTP is a <em>text-based</em> protocol</li>
						<ul>
							<li>the connection is used to transmit <em>text messages</em></li>
							<li>all HTTP messages are human-readable (not all <em>entities</em>, though)</li>
							<li>basic HTTP operations can be carried out by hand</li>
						</ul>
					</ul>
<pre>start-line
message-header *

message-body ?</pre>
				</slide>
				<slide id="http-headers">
					<title>HTTP Header Fields</title>
					<ul>
						<li>Header fields contain information about the message</li>
						<ul>
							<li><em>general header:</em> <code>Date</code> as the message origination date</li>
							<li><em>request header:</em> <code>Accept-Language</code> indicates language preferences</li>
							<li><em>response header:</em> <code>Server</code> contains system information</li>
							<li><em>entity header:</em> <code>Content-Type</code> specifies the media type of the entity</li>
						</ul>
						<li>HTTP defines <a href="http://www.cs.tut.fi/~jkorpela/http.html">a number of header fields</a></li>
						<ul>
							<li>unknown fields must be ignored (extensibility)</li>
							<li>unstandardized fields should use a <q><code>X-</code></q> prefix</li>
						</ul>
						<li>HTTP is about acting on these fields</li>
						<ul>
							<li>HTTP defines what HTTP implementations must or should do</li>
						</ul>
					</ul>
				</slide>
				<slide id="http-conneg">
					<title>HTTP Content Negotiation</title>
					<ul>
						<li><link href="http-headers"/> have interaction semantics</li>
						<ul>
							<li>depending on the <q>header type</q> they convey different information</li>
						</ul>
						<li><em href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec12.html">HTTP Content Negotiation</em> allows representation negotiation</li>
						<ul>
							<li>the client specifies a number of preferred content properties</li>
							<li>the server responds with the representation that <q>fits best</q></li>
						</ul>
						<li><em>Server-driven negotiation</em> can use a number of request header fields</li>
						<ul>
							<li><code>Accept</code> uses a list of <link href="mediatypes"/></li>
							<li><code>Accept-Charset</code> uses a list of character sets</li>
							<li><code>Accept-Encoding</code> uses a list of content encodings</li>
							<li><code>Accept-Language</code> uses a list of language codes</li>
							<li><code>User-Agent</code> specifies the client's identification</li>
						</ul>
						<li><em>Client-driven negotiation</em> lets the server send a list of URIs</li>
						<ul>
							<li>two steps are required to get the best alternate representation</li>
						</ul>
					</ul>
				</slide>
				<slide id="http-request">
					<title>HTTP Requests</title>
					<ul>
						<li>After opening a connection, the client sends a request</li>
						<ul>
							<li>the method indicates the action to be performed on the resource</li>
							<li>HTTP's most interesting methods are: <code>GET</code>, <code>HEAD</code>, <code>POST</code></li>
							<li>other interesting methods are: <code>PUT</code>, <code>DELETE</code></li>
						</ul>
						<li>The URI identifies the resource to which the request should be applied</li>
						<ul>
							<li>absolute URIs are required when contacting <em>proxies</em></li>
							<li>absolute paths are required when contacting a server directly</li>
							<li>the URI may contain <em>query information</em></li>
						</ul>
						<li>The <code>Host</code> header field must be included in every request</li>
					</ul>
<pre>Method Request-URI HTTP/Major.Minor
[Header]*

[Entity]?</pre>
				</slide>
				<slide id="http-get">
					<title>HTTP GET</title>
					<ul>
						<li>Retrieval action based on the URI</li>
						<ul>
							<li>maybe implemented by reading a file</li>
							<li>maybe implemented by processing a file (PHP)</li>
							<li>maybe implemented by invoking a process</li>
						</ul>
						<li>Semantics may change based on header fields</li>
						<ul>
							<li><code>If-*:</code> only reply with the entity if necessary</li>
							<li><code>Range:</code> only reply with the requested part of the entity</li>
						</ul>
						<li>Cacheability depends on header fields of the response</li>
					</ul>
<pre>GET / HTTP/1.1
Host: ischool.berkeley.edu</pre>
				</slide>
				<slide id="http-response">
					<title>HTTP Responses</title>
					<ul>
						<li>The server's response to interpreting a request</li>
						<ul>
							<li>the status code is given numerically and as text</li>
							<li><code>2**</code> for variations of <q>ok</q></li>
							<li><code>3**</code> for redirections</li>
							<li><code>4**</code> are different client side problems (<code>404</code>: not found)</li>
							<li><code>5**</code> are different server side problems</li>
						</ul>
						<li>Header fields specify additional information</li>
						<ul>
							<li>information about the server</li>
							<li>information about the entity (media type, encoding, language)</li>
						</ul>
					</ul>
<pre>HTTP/Major.Minor Status-Code Text
[Header]*

[Entity]?</pre>
				</slide>
				<slide id="http-performance">
					<title>HTTP Performance</title>
					<ul>
						<li>HTTP/1.0 allowed one transaction per connection</li>
						<ul>
							<li>TCP connection setup and teardown are expensive</li>
							<li>TCP's <em>slow start</em> slows down the initial phase of data transfer</li>
							<li>typical Web pages use between 10-20 resources (HTML + images + CSS + scripts)</li>
							<li>typically, these resources are stored on the same server</li>
						</ul>
						<li>HTTP/1.1 introduces <em>persistent connections</em></li>
						<ul>
							<li>the TCP connection stays open for some time (10 sec is a popular choice)</li>
							<li>additional requests to the same server use the same TCP connection</li>
						</ul>
						<li>HTTP/1.1 introduces <em>pipelined connections</em></li>
						<ul>
							<li>instead of waiting for a response, requests can be queued</li>
							<li>the server responds as fast as possible</li>
							<li>the order may not be changed (there is no sequence number)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>HTTP Connection Handling</title>
					<img style="width : 90% ; margin : 2% ; " src="http-phttp-pipelining.png"/>
				</slide>
			</part>
			<part id="http-authentication">
				<title>HTTP Authentication</title>
				<slide>
					<title>HTTP Access Control</title>
					<ul>
						<li>HTTP servers can <a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes#4xx_Client_Error">deny access</a> through access control</li>
						<ul>
							<li><code>401 Unauthorized</code> means the resource is access controlled</li>
							<li><code>403 Forbidden</code> means the resource is inaccessible</li>
							<li><code>405 Method Not Allowed</code> signals a request using the wrong <link href="http-request">request method</link></li>
						</ul>
						<li>Two different approaches to unauthorized access are possible</li>
						<ul>
							<li>repeat the HTTP request with the proper authentication credentials</li>
							<li>redirect to a <link href="login-page"/> and establish an authenticated <link href="session"/></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>HTTP Authentication</title>
					<img style="width : 90% ; margin : 2% ; " src="authentication-http.gif" title="HTTP Authentication" href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/Security5.html"/>
				</slide>
				<slide id="http-basic">
					<title>Basic HTTP Authentication</title>
					<ul>
						<li>Authentication is based on <em>authentication realms</em></li>
						<ul>
							<li>a set of resources for which the authentication is required</li>
							<li>an opaque name which is used to signal which login is required</li>
							<li>username/password often is specific for a given realm</li>
						</ul>
						<li>Users supply username and password through the client</li>
						<ul>
							<li>sent as <a href="http://en.wikipedia.org/wiki/Base64">Base64</a> encoded <q><code>username:password</code></q> string</li>
							<li>username and password are <a href="http://www.google.com/search?hl=en&amp;q=base64+decoder"><em>not</em> transmitted securely</a></li>
							<li>basic authentication should <em>always</em> use <link href="https">HTTPS</link></li>
						</ul>
						<li>Authorization is handled on the server side</li>
						<pre href="http://en.wikipedia.org/wiki/Basic_access_authentication">HTTP/1.0 401 Unauthorized
	WWW-Authenticate: Basic realm="SokEvo"</pre>
						<pre href="http://en.wikipedia.org/wiki/Basic_access_authentication">GET /private/index.html HTTP/1.0
	Authorization: Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==</pre>
					</ul>
				</slide>
				<slide>
					<title>Repeated Access</title>
					<ul>
						<li>Clients typically access more than one protected resource</li>
						<ul>
							<li>a perfectly stateless client would always request authentication from the user</li>
							<li>using the <em>realm</em> clients can identify repeated accesses</li>
						</ul>
						<li>Web interactions by default are perfectly stateless</li>
						<ul>
							<li>each request is completely independent from other requests</li>
							<li>stateless interactions make the Web loosely coupled and scalable</li>
							<li>concepts like the <em>realm</em> or <link href="cookies"/> introduce <q>state</q></li>
						</ul>
						<li>Clients remember the authentication and replay it automatically</li>
						<ul>
							<li>browsers provide little control over this feature</li>
							<li><q>logging out</q> of HTTP authenticated sessions is hard</li>
						</ul>
					</ul>
				</slide>
				<slide id="login-page">
					<title>Login Page</title>
					<ul>
						<li><link href="http-basic"/> works with browser controls (including the window)</li>
						<ul>
							<li>no possibility to <q>log out</q> without using browser-specific controls</li>
							<li>client side security depends on browser security measures</li>
						</ul>
						<li>Using <link href="forms"/> gives more freedom in session management</li>
						<ul>
							<li><link href="authentication"/> and <link href="authorization"/> are completely application-based</li>
							<li>if there were <q>secure personal browsers</q> this would not work very well</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<slide>
			<title>Conclusions</title>
			<ul>
				<li>HTTP is much more than file transfer</li>
				<ul>
					<li>it is a protocol for the concept of <em>resource manipulation</em></li>
					<li>it is a distinct step away from the <em>API approach</em> to building distributed systems</li>
				</ul>
				<li>HTTP servers can be configured to deliver good or bad service</li>
				<ul>
					<li>this is a question of how well they are configured on the HTTP level</li>
					<li>it is also a question of how good the Web design is</li>
					<li>both issues together are required to set up a good Web server</li>
				</ul>
			</ul>
		</slide>
    </presentation>
    <presentation id="security">
        <title short="Security">Security &amp; Privacy</title>
        <date>2009-11-05</date>
        <toc class="reading"><a href="http://en.wikipedia.org/wiki/Internet_security" title="Wikipedia: Internet Security">Security</a>&#160;· <a href="http://en.wikipedia.org/wiki/Internet_privacy" title="Wikipedia: Internet Privacy">Privacy</a>&#160;· <a href="http://cacm.acm.org/magazines/2009/8/34494-browser-security/fulltext" title="Browser Security: Lessons from Google Chrome, Charles Reis, Adam Barth, Carlos Pizano, Communications of the ACM, Vol. 52 No. 8, Pages 45-49, August 2009">Browser Security</a></toc>
        <toc class="resources"><a href="http://support.mozilla.com/en-US/kb/Options+window" title="Firefox Options for Security and Privacy">Browser Options</a>&#160;· <a href="http://en.wikipedia.org/wiki/Https" title="Wikipedia: HTTPS">HTTPS</a>&#160;· <a href="http://tools.ietf.org/html/rfc2818" title="IETF RFC 2818: HTTP over TLS (HTTPS)">HTTPS Spec</a></toc>
        <toc class="abstract">TCP and thus HTTP are clear-text protocols, which make no attempt to hide the data being transmitted. For secure data transfers, it thus is necessary to use additional technologies for providing secure data transfers. For the Web, the most interesting security feature are secure HTTP interactions, which are provided by <em>HTTP over SSL (HTTPS)</em>, a protocol that layers an encryption layer (SSL or TLS) between TCP and HTTP. For any task involving personalization and/or trust, it is not only necessary to have a concept for providing privacy, but also to have concepts for identity and how to prove identity, which needs authentication.</toc>
        <slide>
            <title>Abstract</title>
            <p class="abstract"><toc class="abstract"/></p>
        </slide>
 		<part id="security-concepts">
			<title>Security Concepts</title>
			<slide id="identification">
				<title>Identification</title>
				<ul>
					<li><em>Identity</em> is required for any non-anonymous communications</li>
					<ul>
						<li><em>groups</em> can have an identity (facebook members see more than non-members)</li>
						<li><em>pseudonyms</em> are <q>hidden identities</q> (the <q>real identity</q> is not visible)</li>
						<li><em>personal identity</em> should be tied to a person itself</li>
					</ul>
					<li><em>Proof of Identity</em> is important for any privileged operation</li>
					<ul>
						<li><em>signatures</em> and <em>seals</em> are traditional ways</li>
						<li>traditional ways are mostly protected by law (but not really safe)</li>
						<li>more modern ways often include technical methods for <link href="authentication"/></li>
					</ul>
					<li>Client identity on the Web can be bound in three ways:</li>
					<ol>
						<li>Computer (most of the time <q>identified</q> by an <link href="ip-address"/>)</li>
						<li>Browser (in the form of a stored <link href="cookies">cookie</link>)</li>
						<li>User (identified through some <link href="authentication">authentication method</link>)</li>
					</ol>
				</ul>
			</slide>
			<slide id="authentication">
				<title>Authentication</title>
				<ul>
					<li><em>Authentication</em> is the process of verifying an identity</li>
					<ul>
						<li>the weakest form of authentication is simply trust</li>
						<li>legal consequences can make it more risky to falsify authentication</li>
						<li>technical measures should make it hard to impossible to falsify authentication</li>
					</ul>
					<li>Authentication on the Web comes in many different flavors</li>
					<ul>
						<li>implicitly by accessing a server from some <link href="ip-address"/> range</li>
						<li>presenting a <link href="cookies">cookie</link> from a previous formal authentication</li>
						<li>presenting a password as a proof of identity</li>
						<li>proving that you are owning additional authentication hardware (often <a href="http://en.wikipedia.org/wiki/Personal_identification_number">PIN</a>-enabled)</li>
					</ul>
					<li>Risk and potential damage should justify authentication methods</li>
				</ul>
			</slide>
			<slide id="authorization">
				<title>Authorization</title>
				<ul>
					<li><em>Authorization</em> is the question of allowing operations</li>
					<ul>
						<li><link href="identification"/> is necessary to identify the initiator</li>
						<li><link href="authentication"/> is necessary to verify the initiator's identity</li>
						<li>if the initiator is authorized, the operation can be performed</li>
					</ul>
					<li>Web pages often are <em>public</em> or <em>restricted access</em></li>
					<ul>
						<li>public web pages do not require any identification (and thus authentication)</li>
						<li>restricted access Web pages can be group pages (internal company pages)</li>
						<li>personal access is another popular scenario (email, facebook, online banking)</li>
					</ul>
					<li>Web servers have well-defined ways of <link href="http-authentication">performing authentication</link></li>
				</ul>
			</slide>
		</part>
 		<part>
			<title>Browser Security &amp; Privacy</title>
			<slide>
				<title>Trust and Security on the Web</title>
				<ul>
					<li>Web-based applications introduce many risks</li>
					<ul>
						<li>do you trust your browser? (it may not safeguard your information)</li>
						<li>do you trust your computer? (it may have a virus)</li>
						<li>do you trust your network? (it may be monitored on various levels)</li>
						<li>do you trust the server? (it may be a fake <a href="http://en.wikipedia.org/wiki/Phishing">phishing</a> server)</li>
					</ul>
					<li>Most of these risks are amplified by the Web's scale</li>
					<ul>
						<li>phishing and spamming only work because the Web makes fraud more effective</li>
					</ul>
					<li>Controlling Web access is important for safe browsing</li>
					<ul>
						<li>trusting shared browsers is risky (they may store logins and cache pages)</li>
						<li>trusting the network can be risky (more and more networks are wire-tapped)</li>
						<li>trusting the server is risky (phishing and poor server security)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Privacy Options</title>
				<img src="firefox-options-privacy.png" style="height : 70% ; margin : 2% ; " title="Firefox Options: Privacy"/>
			</slide>
			<slide>
				<title>Security Options</title>
				<img src="firefox-options-security.png" style="height : 70% ; margin : 2% ; " title="Firefox Options: Security"/>
			</slide>
			<slide>
				<title>Encryption Options</title>
				<img src="firefox-options-encryption.png" style="height : 70% ; margin : 2% ; " title="Firefox Options: Encryption"/>
			</slide>
		</part>
 		<part id="security-101">
			<title>Security 101</title>
			<slide>
				<title>Cryptography</title>
				<ul>
					<li>Cryptography is structured into different layers</li>
					<ul>
						<li>layering is a well-established principle for <em>separation of concerns</em></li>
					</ul>
					<li><em>Cryptographic primitives</em> implement very basic functionality</li>
					<ul>
						<li>changes and advancements in this area are limited to very specialized researchers</li>
						<li>it is easy to make fatal mistakes which then challenge everything built on top if it</li>
					</ul>
					<li><em>Cryptographic protocols</em> assemble primitives into application-level solutions</li>
					<ul>
						<li>primitives solve very basic security problems (fingerprints, encryption, …)</li>
						<li>protocols combine these into applications (digital signatures, secure communications, …)</li>
					</ul>
				</ul>
			</slide>
			<slide id="one-way-function">
				<title>One-Way Function</title>
				<img style="width : 70% ; margin : 2% ; " src="hash.gif" title="Hash"/>
				<ul>
					<li>Hashes (or <em>message digests</em>) are well-known in computer science</li>
					<li>One-way functions are cryptographically safe hashes</li>
					<ul>
						<li>very hard to find an input producing a given output</li>
						<li>very hard to find two inputs producing the same output (<q>collision</q>)</li>
					</ul>
				</ul>
			</slide>
			<part id="secret-key">
				<title>Secret-Key Cryptography</title>
				<slide>
					<title>Plausible Encryption</title>
					<ul>
						<li>Secret-Key is was most people think of when thinking of encryption</li>
						<ul>
							<li><em>symmetric cryptography</em> is another popular term</li>
						</ul>
						<li>One key for encryption and decryption</li>
						<li>Revealing the key makes encrypted data openly readable</li>
						<ul>
                            <li>there must be a secure channel to transport keys, such as <a href="http://en.wikipedia.org/wiki/Diplomatic_bag">diplomatic pouches</a>.</li>
						</ul>
						<li>Good for long-term relationships with few partners</li>
						<ul>
							<li>exchange secret keys as part of the initial setup of a relationships</li>
							<li>adding partners requires a <em>secure channel</em> for key exchange</li>
							<li>changing keys requires a <em>secure channel</em> for key exchange</li>
						</ul>
						<li>Almost impractical in an environment with many ad-hoc partners</li>
					</ul>
				</slide>
				<slide>
					<title>Notice the Arrow</title>
					<img style="width : 90% ; margin : 2% ; " src="secret-key.gif" title="Secret-Key Cryptography"/>
				</slide>
			</part>
			<part id="public-key">
				<title>Public-Key Cryptography</title>
				<slide>
					<title>Implausible Encryption</title>
					<ul>
						<li>Public-Key intuitively is hard to accept as a concept</li>
						<ul>
							<li><em>asymmetric cryptography</em> is another popular term</li>
						</ul>
						<li>Key pairs of one public and one secret key</li>
						<ul>
							<li><em>key generation</em> is the process of generating these key pairs</li>
						</ul>
						<li>The public key can be made available to the public</li>
						<ul>
							<li>only the secret key can do the inverse operation of the public key</li>
						</ul>
						<li>Good for short-term relationships with many partners</li>
						<ul>
							<li>publish your public key so that it can be used worldwide</li>
							<li>everybody can encrypt data using the public key</li>
							<li>only the owner of the secret can can decrypt the message and read it</li>
						</ul>
						<li>Computationally expensive and not good for a large amounts of data</li>
					</ul>
				</slide>
				<slide>
					<title>No Arrow Here …</title>
					<img style="width : 90% ; margin : 2% ; " src="public-key-secret-encrypt.gif" title="Public-Key Cryptography (Encrypting with Secret Key)"/>
				</slide>
			</part>
			<part id="crypto-protocols">
				<title>Cryptographic Protocols</title>
				<slide>
					<title>Building Secure Applications</title>
					<ul>
						<li><em>Cryptographic primitives</em> in most cases are not sufficient</li>
						<ul>
							<li>they provide basic functionality for fundamental tasks</li>
							<li>they must by combined to provide solutions for real-world problems</li>
						</ul>
						<li>Typical problem #1: How to ensure key authenticity</li>
						<ul>
							<li>with insecure keys, the majority of cryptographic methods is worthless</li>
						</ul>
						<li>Typical problem #2: How to communicate securely without shared keys</li>
						<ul>
							<li>many interesting scenarios are based on ad-hoc interactions</li>
							<li>secret-key does not work, public-key needs to verify the peer</li>
						</ul>
						<li>Typical problem #3: How to check authenticity and integrity of data</li>
						<ul>
							<li>integrity can be done with checksums, but these could be forged</li>
							<li>authenticity needs a cryptographically secure way of combining identity and data</li>
						</ul>
					</ul>
				</slide>
				<slide id="certificate">
					<title>Certificate</title>
					<ul>
						<li>Certificates are digital signatures issued by a trusted party</li>
						<ul>
							<li>most digital signatures are created with certified public keys</li>
							<li>this means the digital signature is created based on a digitally signed key</li>
						</ul>
						<li>Who can you trust on the Web?</li>
						<ul>
							<li>trust can only start to grow based on initial trust in something</li>
							<li>many systems come with pre-installed trust (<em>root certificates</em>)</li>
							<li>certificates from other issuers will cause <a href="https://katapultmedia.com/">browsers to complain</a></li>
						</ul>
						<li>Certificates (like domain names) are a very easy way to make money</li>
						<ul>
							<li>in theory there are different levels of certificates with different levels of identity checking</li>
							<li>in practice most sites choose the cheapest one that does not give an error message</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="https">
			<title short="HTTPS">HTTP over SSL (HTTPS)</title>
			<slide>
				<title>Secure Communications</title>
				<ul>
					<li><link href="public-key">Public-Key cryptography</link> is computationally expensive</li>
					<ul>
						<li>it is possible to encrypt all traffic using asymmetric key pairs</li>
						<li>this generates considerably more load on the server side</li>
					</ul>
					<li>Combining <link href="public-key">public-key</link> and <link href="secret-key">secret-key</link> cryptography</li>
					<ol>
						<li>check the public key for authenticity (using a <link href="certificate"/>)</li>
						<li>generate a key for a secret-key encryption scheme</li>
						<li>use the public key to securely transmit the secret key</li>
						<li>use the secret key for securely transmitting the payload</li>
					</ol>
					<li>Combines the advantages of both methods</li>
					<ul>
						<li>the lower complexity of secret-key algorithms</li>
						<li>the ability of public-key algorithms to work without a secure channel</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>HTTP and Security</title>
				<ul>
					<li>HTTP sends clear-text messages</li>
					<li>Making HTTP secure requires additional mechanisms</li>
					<li>Encryption is done by a layer on top of TCP</li>
					<ul>
						<li><em>Secure Sockets Layer (SSL)</em> is the protocol layer invented by Netscape</li>
						<li><em>Transport Layer Security (TLS)</em> is the standardized Internet version</li>
						<li>TLS adds more encryption schemes and more flexibility</li>
					</ul>
					<li>Lower-level methods may also provide encryption</li>
					<ul>
						<li><em>Virtual Private Networks (VPN)</em> provide IP-based encryption</li>
						<li><em>WEP</em> and <em>WPA</em> provide network interface encryption</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>HTTP and SSL</title>
				<img style="width : 90% ; margin : 2% ; " src="https.gif" title="HTTP and SSL"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Internet Security</title>
				<ul>
                    <li>Security is hard</li>
					<li>Certificates are used to guarantee a party's authenticity</li>
					<li>Certificates are digital signatures issued by trusted parties</li>
					<li>One authenticated, public keys can be used to securely communicate</li>
					<li>Encryption on the Web is based on HTTPS</li>
				</ul>
			</slide>
		</part>
	</presentation>
    <presentation id="state">
        <title short="State">State Management</title>
        <date>2009-11-10</date>
        <toc class="reading"><a href="http://en.wikipedia.org/wiki/HTTP_cookie" title="Wikipedia about HTTP Cookies">Wikipedia</a></toc>
        <toc class="resources"><a href="http://www.w3.org/2001/tag/doc/state.html" title="State in Web Application Design">State</a>&#160;· <a href="http://dret.net/rfc-index/reference/RFC2965" title="Cookies RFC">Cookies Spec</a></toc>
        <toc class="abstract">HTTP is a stateless protocol, where each request/response interaction is a separate interaction and there is no protocol support for longer sessions (such as a user logging in and working on a Web site as an identified user). <em>State management</em> refers to mechanisms which provide support for this kind of scenario, the most popular choice for state management are <em>cookies</em>. Another possibility is URI-based state management. This lecture is a first glimpse into the world of <em>Representational State Transfer (REST)</em>, the Web's fundamental model of handling interaction with resources.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="session">
			<title>Session</title>
			<slide>
				<title>HTTP and Sessions</title>
				<ul>
					<li>HTTP has no session concept</li>
					<ul>
						<li>interactions are HTTP request/response pairs and not site visits</li>
						<li><link href="http-performance">HTTP/1.1</link> does not change this, it is only a performance optimization</li>
						<li>servers can not reliably identify users interacting with a Web site</li>
					</ul>
					<li>Sessions should not be used to track resource state</li>
					<ul>
						<li>the semantics of resource interactions should not depend on client state</li>
						<li>application behavior can depend on client state</li>
					</ul>
					<li>HTTP's concept of <em>stateless interaction</em> is important</li>
					<ul>
						<li>the Web's idea is to use <em>loose coupling</em> between clients and servers/resources</li>
						<li>retrofitting the Web with <em>tight coupling</em> through server state is bad design</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Client-Side State</title>
				<ul>
					<li>Sessions should be maintained on the client</li>
					<ul>
						<li>the client has all relevant information about a session</li>
						<li>when the server restarts, no information will be lost</li>
						<li>if something has to be persistent, it should be a resource</li>
					</ul>
					<li>Small and short-term solutions may work well with server state</li>
					<ul>
						<li><em>scaling</em> these solutions typically introduces many problems</li>
						<li><em>debugging</em> can be hard because the state is transient</li>
						<li><em>integration</em> with other clients can become a difficult problem</li>
					</ul>
					<li>Three ways of client-side state are possible</li>
					<ol>
						<li>sending back and forth state as part of the interaction</li>
						<li>store state in the server and refer to it from the client (not recommended)</li>
						<li>store state at a URI and use the URI to refer to that state</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>State in HTML or HTTP</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-client-state.png" title="State in HTML or HTTP"/>
			</slide>
			<slide>
				<title>State in the Server Application</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-server-state.png" title="State in the Server Application"/>
			</slide>
			<slide>
				<title>State as a Resource</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-resource-state.png" title="State as a Resource"/>
			</slide>
			<slide>
				<title>Stateless Shopping</title>
				<ul>
					<li>Typical <q>session scenarios</q> can be <a href="http://www.peej.co.uk/articles/no-sessions.html">mapped to resources</a></li>
					<ul>
						<li>Client: Show me your products</li>
						<li>Server: Here's a list of all the products</li>
						<li>Client: I'd like to buy 1 of http://ex.org/product/X, I am "John"/"Password"</li>
						<li>Server: I've added 1 of http://ex.org/product/X to http://ex.org/users/john/basket</li>
						<li>Client: I'd like to buy 1 of http://ex.org/product/Y, I am "John"/"Password"</li>
						<li>Server: I've added 1 of http://ex.org/product/Y to http://ex.org/users/john/basket</li>
						<li>Client: I don't want http://ex.org/product/X, remove it, I am "John"/"Password"</li>
						<li>Server: I've removed http://ex.org/product/X to http://ex.org/users/john/basket</li>
						<li>Client: Okay I'm done, username/password is "John"/"Password"</li>
						<li>Server: Here is the total cost of the items in http://ex.org/users/john/basket</li>
					</ul>
					<li>This is more than just renaming <q>session</q> to <q>resource</q></li>
					<ul>
						<li>all relevant data is stored persistently on the server</li>
						<li>the shopping cart's URI can be used by other services for working with its contents</li>
						<li>instead of <em>hiding the cart in the session</em>, it is <em>exposed as a resource</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Reusing Resources</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-reusing-resource.png" title="Reusing Resources"/>
			</slide>
		</part>
		<part id="cookies">
			<title>Cookies</title>
			<slide>
				<title>Tracking Sessions</title>
				<ul>
					<li>Invented as a way to compensate for HTTP's lack of state</li>
					<ul>
						<li>application state is being sent to the client (<http>Set-Cookie2</http>)</li>
						<li>the client transmits application state in requests (<http>Cookie</http>)</li>
					</ul>
					<li>Cookies do not contain code that is executed</li>
					<ul>
						<li>some data that represents application state (by value or by reference)</li>
						<li>this data is stored by the client and returned to the server</li>
						<li>the client is not supposed to interpret the data in any way</li>
					</ul>
					<li>Cookies can be used in many different ways</li>
					<ul>
						<li>when used for tracking application state they are unproblematic</li>
						<li>when used for tracking resource state they introduce problems</li>
					</ul>
					<li>Cookies tightly bind clients to opaque concepts on the server</li>
				</ul>
			</slide>
			<slide>
				<title>Cookies for State Management</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-cookie-state.png" title="Cookies for State Management"/>
			</slide>
			<part>
				<title>Third-Party Cookie</title>
				<slide>
					<title>Advertising &amp; Privacy</title>
					<ul>
						<li>Big ad servers are digital hubs in the commercial Web</li>
						<ul>
							<li>consumers switch content providers but get the same ad provider</li>
							<li>tracking consumers <em>across</em> content providers is very valuable</li>
						</ul>
						<li>Cookies set by ad providers are sent very frequently</li>
						<ul>
							<li>each site that uses the ad provider triggers the cookies to be sent</li>
							<li>detailed profiling can be employed for creating consumer profiles</li>
						</ul>
						<li>Content and ad providers can cooperate for better profiling</li>
						<ul>
							<li>consumers log in to content providers are are reliably identified</li>
							<li>their personal profile can be matched with the ad provider's profile</li>
							<li>ad provider consolidation makes this scenario realistic</li>
						</ul>
					</ul>
				</slide>
			<slide>
				<title>Browsers Assemble Web Pages</title>
				<p>Typical Web resources (HTML pages) are assembled from a number of resources retrieved by HTTP. Any resource not originating on the server that is hosting the HTML page is considered a <q>third-party resource</q>. If the HTTP response for such a resource contains a cookie, it is a <q>third-party cookie</q>.</p>
				<img style="width : 90% ; margin : 2% ; " src="third-party-cookie.png" title="Third Party Cookie"/>
			</slide>
			</part>
		</part>
		<part>
			<title>Cookie-Less State Tracking</title>
			<slide>
				<title>Cookie Support</title>
				<ul>
					<li>Authentication can be tracked with <link href="http-authentication"/></li>
					<ul>
						<li>this is possible because authentication is built into HTTP</li>
					</ul>
					<li>Other session concepts are not supported by HTTP</li>
					<ul>
						<li>cookies have become the generic solution for all session tracking</li>
					</ul>
					<li>Cookies are increasingly limited by browsers</li>
					<ul>
						<li>cookies have gained some notoriety as privacy invaders</li>
						<li>browsers have more restrictive default settings</li>
						<li>an increasing number of users restricts cookie support</li>
					</ul>
					<li>Session-oriented Web sites often depend on cookies</li>
				</ul>
			</slide>
			<slide id="uri-rewriting">
				<title>URI Rewriting</title>
				<ul>
					<li><link href="cookies"/> are a piece of information stored on the client</li>
					<ul>
						<li>they are sent by the server as a result of a request</li>
						<li>they are returned by the browser in a response to the same site</li>
					</ul>
					<li>The same information can also be encoded in the URI</li>
					<ul>
						<li>normally a response contains a cookie and an HTML page</li>
						<li>the same effect is achieved when all links include the <q>cookie value</q></li>
						<li>this method often results in very long URIs</li>
					</ul>
					<li>Some Web application frameworks switch automatically</li>
					<ul>
						<li>J2EE checks for cookie support and switches to URI rewriting if required</li>
					</ul>
					<li>Problems with bookmarks and caches</li>
				</ul>
			</slide>
			<slide>
				<title>Hidden Form Fields</title>
				<ul>
					<li><link href="cookies"/> transmit session information via HTTP</li>
					<li><link href="uri-rewriting"/> encodes session information in URIs</li>
					<li><link href="forms"/> are a way to send data to a server</li>
					<ul>
						<li>in most cases this is data that is entered by the user</li>
					</ul>
					<li>Hidden form fields can be used to send data that is part of the HTML</li>
					<ul>
						<li>hidden form fields are never displayed to the user</li>
						<li>their predefined values are sent as part of the form submission</li>
					</ul>
					<li>Hidden form fields are essentially the same as <link href="uri-rewriting"/></li>
					<ul>
						<li>they can only be used if the interaction is based on forms</li>
						<li>they also require the Web page to be dynamically generated for each request</li>
						<li>the values end up as URI query string or request entity</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Session for Application State</title>
				<ul>
					<li>Sessions should only be used for application state</li>
					<li>Cookies are the best way to track sessions</li>
					<ul>
						<li>cookies should be self-contained rather than referential</li>
					</ul>
					<li>Alternative methods are URI rewriting and hidden form fields</li>
					<ul>
						<li>more robust than cookies but unpleasant side-effects</li>
					</ul>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="rest">
        <title short="REST">Representational State Transfer (REST)</title>
        <date>2009-11-17</date>
        <toc class="assignment"><a href="a/6/">A6</a>&#160;assigned (due&#160;date:&#160;11/22)</toc>
        <toc class="reading"><a href="http://www.mulberrytech.com/Extreme/Proceedings/html/2002/Prescod01/EML2002Prescod01.html" title='P. Prescod, "Roots of the REST/SOAP Debate", Extreme Markup Languages Conference, August 2002'>REST vs. SOAP</a>&#160;· <a href="http://www.eioba.com/a69755/how_i_explained_rest_to_my_wife">What is REST?</a>&#160;· <a href="http://bitworking.org/news/193/Do-we-need-WADL">REST Interfaces</a></toc>
        <toc class="resources"><a href="http://rest.blueoxen.net/cgi-bin/wiki.pl">RESTwiki</a></toc>
        <toc class="abstract"><em>Representational State Transfer (REST)</em> is an architectural style for building distributed systems. The Web is an example for such a system. REST-style applications can be built using a wide variety of technologies. REST's main principles are those of resource-oriented states and functionalities, the idea of a unique way of identifying resources, and the idea of how operations on these resources are defined in terms of a single protocol for interacting with resources. REST-oriented system design leads to systems which are open, scalable, extensible, and easy to understand.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>The Web as a System</title>
			<ul>
				<li>The Web is one distributed hypermedia system</li>
				<ul>
					<li>the main architectural components are URIs, HTTP, and HTML</li>
					<li>all other Web technologies are built on that foundation</li>
					<li>if they are not, they are very likely not well-designed Web technologies</li>
				</ul>
				<li>The Web is amazingly open, scalable, extensible, and easy to understand</li>
				<ul>
					<li><em>openness</em> allows new technologies to be introduced</li>
					<li><em>scalability</em> ensures that the system does not contain bottlenecks</li>
					<li><em>extensibility</em> allows the Web to evolve without any redesign of existing parts</li>
					<li><em>simplicity</em> makes sure that the system survives and evolves</li>
				</ul>
				<li>No other information system gets even close to the Web</li>
				<ul>
					<li>but not all information system designs can accept the Web's limitations</li>
					<li>REST should be seen as a guideline how to build a true Web application</li>
					<li>other applications will continue to be built using other approaches</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Web System Design</title>
			<blockquote>There are two ways of constructing a software design: One way is to make it so simple that there are <em>obviously</em> no deficiencies, and the other way is to make it so complicated that there are no <em>obvious</em> deficiencies. The first method is far more difficult.</blockquote>
			<p class="quotenote"><a href="http://en.wikipedia.org/wiki/Charles_Antony_Richard_Hoare">C. A. R. Hoare</a>, <a href="http://dret.net/biblio/reference/hoa81"><q>The Emperor's Old Clothes</q>, 1980 Turing Award Lecture</a></p>
		</slide>
		<part>
			<title>Technologies and Implementations</title>
			<slide>
				<title>Object-Orientation</title>
				<ul>
					<li>Object-Orientation is a <em>Software Engineering Style</em></li>
					<ul>
						<li>it can be applied to any programming language</li>
						<li>depending on the language, this is more or less easy</li>
						<li>OO languages support or even enforce certain design patterns</li>
						<li><em>spaghetti code</em> can be written in every programming language</li>
					</ul>
					<li>Implementations can always be bad or good</li>
					<ul>
						<li>the quality of the implementation depends on the programmer</li>
						<li>programmers can be better supported and controlled with an OO language</li>
						<li>implementation quality metrics must be based on the product, not the language</li>
					</ul>
					<li>Good programmers always produce good code</li>
					<li>Bad programmers always produce bad code</li>
					<li>Average programmers need good tools to produce good code</li>
				</ul>
			</slide>
			<slide>
				<title>Technologies are Tools</title>
				<ul>
					<li>Technologies help solving problems</li>
					<ul>
						<li>they are built with certain goals in mind</li>
						<li>they specialize in solving a problem <em>in a specific way</em></li>
					</ul>
					<li>Technology choices are very important</li>
					<ul>
						<li>the technology (i.e., the tool) shapes the way a problem is solved</li>
						<li>working <q>against</q> the tool is possible, but hard and rarely done</li>
					</ul>
					<li>Technologies sometimes cloud the more important issues</li>
					<ol>
						<li>the problem must be well-defined and fully understood</li>
						<li>the general approach to solve a problem must be identified</li>
						<li>finally, a technology supporting this approach must be chosen</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Implementations are Products</title>
				<ul>
					<li>Implementations are built on (and shaped by) technologies</li>
					<li>Implementation = Concepts + Technologies</li>
					<li>Implementation quality depends on both factors</li>
					<ul>
						<li>the right choice of technologies is very important</li>
						<li>the responsible use of that foundation is equally important</li>
					</ul>
					<li>Good REST is as hard to grasp as good OO</li>
					<ul>
						<li>products may claim that they are REST/OO</li>
						<li>they may even use technologies which support REST/OO</li>
						<li>only careful inspection reveals the truth of this claim</li>
						<li>in most cases, this only happens when the product needs to be changed</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="rest-principle">
			<title>REST Principles</title>
			<slide>
				<title>Definition</title>
				<ul>
					<li>Resources are defined by URIs</li>
					<li>Resources are manipulated through their representations</li>
					<li>Messages are self-descriptive and stateless</li>
					<li>There can be multiple representations for a resource</li>
					<li>Application state is driven by resource manipulations</li>
				</ul>
			</slide>
			<slide>
				<title>Resources</title>
				<ul>
					<li>Resources are defined by URIs</li>
					<ul>
						<li>resources can never be accessed or manipulated directly</li>
						<li>REST works with resource representations</li>
					</ul>
					<li>Resources are all the things we want to work with</li>
					<ul>
						<li>if you cannot name something, you cannot do anything with it</li>
						<li>a popular resource type on the Web are documents</li>
						<li>documents usually are a structured collection of information</li>
					</ul>
					<li>Documents are abstract concepts of descriptive resources</li>
					<ul>
						<li>they may be used in different contexts (e.g., formats)</li>
						<li>different applications may be interested in different representations</li>
						<li>the underlying resource is always the same</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>State</title>
				<ul>
					<li>State is represented as part of the content being transferred</li>
					<ul>
						<li>server interruptions do not create problems for the client</li>
						<li>it is possible to switch between servers for different interactions</li>
						<li>clients can simply store the representation to save the state</li>
					</ul>
					<li>State transfer makes the system scalable</li>
					<ul>
						<li>data transfer is not state-specific (no stateful connection handling)</li>
						<li>state is transferred between client and server</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Establishing a Common Model</title>
				<ul>
					<li>Distributed systems must be based on a shared model</li>
					<ul>
						<li>traditional systems must agree on a common API</li>
						<li>REST systems structure agreement into three areas</li>
					</ul>
					<li>REST is built around the idea of simplifying agreement</li>
					<ul>
						<li><em>nouns</em> are required to name the resources that can be talked about</li>
						<li><em>verbs</em> are the operations that can be applied to named resources</li>
						<li><em>content types</em> define which information representations are available</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Nouns</title>
				<ul>
					<li>Nouns are the names of resources</li>
					<ul>
						<li>in most designs, these names will be URIs</li>
						<li>URI design is a very important part of a REST-based system design</li>
					</ul>
					<li>Everything of interest should be named</li>
					<ul>
						<li>by supporting well-designed names, applications can talk about named things</li>
						<li>new operations and representations can be introduced</li>
					</ul>
					<li>Separating nouns from verbs and representations improves extensibility</li>
					<ul>
						<li>applications might still work with resources without being able to process them</li>
						<li>introducing new operations on the Web does not break the Web</li>
						<li>introducing new content types on the Web does not break the Web</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Verbs</title>
				<ul>
					<li>Operations which can be applied to resources</li>
					<li>The core idea of REST is to use <em>universal verbs</em> only</li>
					<ul>
						<li>universal verbs can be applied to all nouns</li>
					</ul>
					<li>For most applications, HTTP's basic methods are sufficient</li>
					<ul>
						<li><http>GET</http>: Fetching a resource (there must be no side-effects)</li>
						<li><http>PUT</http>: Transfers a resource to a server (overwriting if there already is one)</li>
						<li><http>POST</http>: Adds to an existing resource on the server</li>
						<li><http>DELETE</http>: Discards a resource (its name cannot be used anymore)</li>
					</ul>
					<li>Corresponding to the most popular basic database operations</li>
					<ul>
						<li>CRUD: Create, Read, Update, Delete</li>
					</ul>
				</ul>
			</slide>
			<slide id="http-post">
				<title><http>POST</http>ing</title>
				<ul>
					<li><http>POST</http> adds instead of an overwriting update</li>
					<li><http>POST</http> can have different effects</li>
					<ul>
						<li>by <http>POST</http>ing, state is changed and a new resource is created</li>
						<li>by <http>POST</http>ing, only the existing resource is changed</li>
						<li>the server signals the difference using HTTP responses (<http>200 OK</http> or <http>201 Created</http>)</li>
					</ul>
					<li>This is a <em>design choice</em></li>
					<ul>
						<li>if the added information needs to be accessible individually, create a new resource</li>
						<li>for changes of an existing resource, no new resource has to be created</li>
					</ul>
					<li>Make sure that resources are navigable using URIs</li>
					<ul>
						<li>if appropriate, a relationship can be represented in the resource format</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Content Types</title>
				<ul>
					<li>Representations should be machine-processable</li>
					<ul>
						<li>they don't have to, they may be opaque to applications</li>
						<li>in many cases, machine-processable representations are advantageous</li>
					</ul>
					<li>Resources are abstractions, REST passes representations around</li>
					<ul>
						<li>resources can have various representations (i.e., content types)</li>
						<li>clients can request content types they are interested in</li>
					</ul>
					<li>Adding or changing content types does not change the system architecture</li>
					<ul>
						<li>different clients and servers support different content types</li>
						<li><link href="http-conneg"/> allows content types to be negotiated dynamically</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>REST vs. <q>Web Services</q></title>
				<ul>
					<li>REST is a description of the Web's design principles</li>
					<ul>
						<li>it is not something new, it is simply a systematic view of the Web</li>
						<li>REST's claim is to be able to learn from the Web's success</li>
					</ul>
					<li>Web Services (the SOAP flavor) do not build on REST</li>
					<ul>
						<li>they use HTTP as a transport protocol</li>
						<li>they re-create Web functionality through additional specifications (WS-*)</li>
						<li>they have been built by programmers using a top-down approach</li>
					</ul>
					<li>REST and Web Services have different design approaches</li>
					<ul>
						<li>REST starts at the resources and takes everything from there</li>
						<li>Web Services focus on messages, which in most cases are operations</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="rest-implementation">
			<title>REST Implementation</title>
			<slide>
				<title>REST Technologies</title>
				<ul>
					<li>REST is not tied to a particular set of technologies</li>
					<ul>
						<li><link href="rest-uri"/> are the most common choice for nouns</li>
						<li><link href="rest-http"/> methods are the most common choice for verbs</li>
						<li><link href="rest-xml"/> is the most common choice for content types</li>
					</ul>
					<li>Choosing other technologies should have a very good reason</li>
					<ul>
						<li>building a REST system should make it open and accessible</li>
						<li>technology choices are as important as architectural choices</li>
					</ul>
				</ul>
			</slide>
			<slide id="rest-uri">
				<title>URIs</title>
				<ul>
					<li>REST requires a lot of URI design</li>
					<ul>
						<li>instead of being generated as a side-effect, they are the core of the system</li>
					</ul>
					<li>Designing URIs and starting from them is a new way of thinking</li>
					<ul>
						<li>URIs are much more powerful than just being an address of a Web page</li>
					</ul>
					<li>URIs are names for concepts</li>
					<ul>
						<li>concepts are never transmitted, only their representation</li>
						<li>having to focus on concepts rather than representations is helpful</li>
					</ul>
				</ul>
			</slide>
			<slide id="rest-http">
				<title>HTTP</title>
				<ul>
					<li>HTTP is the most successful RESTful protocol</li>
					<ul>
						<li>HTTP's author Roy Fielding coined the term <q>REST</q> in his <a href="http://dret.net/biblio/reference/fie00">Ph.D. thesis</a></li>
					</ul>
					<li>HTTP should be regarded as an <q>application-level protocol</q></li>
					<ul>
						<li>Web Service technologies use HTTP as a transport protocol</li>
						<li>HTTP has much more to offer than a firewall-penetrating pipe</li>
					</ul>
					<li>Web infrastructure is built around proper HTTP usage</li>
					<ul>
						<li>caching is built into HTTP and caches optimize the Web transparently</li>
						<li>authentication can be done using HTTP's authentication methods</li>
						<li>secure data transfer can be done using <link href="https"/></li>
					</ul>
				</ul>
			</slide>
			<slide id="rest-xml">
				<title>XML</title>
				<ul>
					<li>URI-identified resources are abstract concepts</li>
					<ul>
						<li>for machine-based processing, XML is a good representation</li>
						<li>for human-oriented interactions, HTML probably is a better choice</li>
					</ul>
					<li>Connections to other resources must be done by URI</li>
					<ul>
						<li>XML does not make built-in assumptions about identifiers</li>
						<li>but it does support URIs, for example with <em>XInclude</em> and <em>XML Base</em></li>
						<li>RESTful applications are about navigating a Web of URI-identified resources</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Better Services</title>
				<ul>
					<li>REST is an architectural style</li>
					<ul>
						<li>URI/HTTP/HTML/XML may be replaced</li>
						<li>the general principle of resource-based interaction remains valid</li>
					</ul>
					<li>RESTful system designs can create better systems</li>
					<ul>
						<li>a little bit more design effort in the beginning</li>
						<li>a lot less headaches later</li>
					</ul>
					<li>SOA often are not really RESTful</li>
					<ul>
						<li>SOA often focuses on operations</li>
						<li>REST focuses on resources</li>
					</ul>
					<li>RESTful design is a good starting point for OO implementations</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="semweb">
        <title>Semantic Web</title>
        <date>2009-11-19</date>
		<toc class="reading"><a href="http://microformats.org/" title="microformats.org">Microformats</a>&#160;· <a href="http://www.google.com/search?q=%22Which%20Semantic%20Web?%22+Catherine+C.+Marshall+Frank+M.+Shipman" title='Catherine C. Marshall, Frank M. Shipman, "Which Semantic Web?", pp. 57-66, Proceedings of the 14th ACM Conference on Hypertext and Hypermedia, ACM Press, Nottingham, UK, August 2003'>Which Semantic Web?</a></toc>
        <toc class="resources"><a href="http://www.w3.org/TR/xhtml-rdfa-primer/" title="RDFa Primer 1.0">RDFa</a>&#160;· <a href="http://www.w3.org/2001/sw/SW-FAQ" title="W3C Semantic Web FAQ">FAQ</a>&#160;· <a href="http://www.w3.org/TR/rdf-primer/" title="W3C RDF Primer">RDF</a>&#160;· <a href="http://www.w3.org/TR/owl-features/" title="W3C OWL Overview">OWL</a></toc>
        <toc class="abstract">The <em>Semantic Web</em> can either be understood as a prepackaged set of languages and technologies for representing semantics and working with them, or as a more general idea of <q>Web Semantics</q>, which instead of predefining certain languages and technologies just looks at the various options of how more semantics can be represented on the Web. Taking the latter approach, this lecture looks at the various ways in which semantics can be introduced on the Web, and what is required in these scenarios in terms of technology and information sharing.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Information About the Web</title>
			<slide>
				<title>Web Resources</title>
				<ul>
					<li>Every Web resource is identified by URI</li>
					<li>Resources can be self-contained or represent real-world objects/concepts</li>
					<ul>
						<li>A Web page with a commentary on it <em>is</em> the commentary</li>
						<li>A home page may <em>represent</em> a person and not just an abstract home page</li>
						<li>many philosophical questions about identity, equality, and ontology</li>
					</ul>
					<li>Practically speaking, many Web resources have <q>implicit links</q></li>
					<ul>
						<li>technical limitations (finding <a href="http://www.google.com/search?q=link%3Awww.ischool.berkeley.edu">inbound links</a> is hard)</li>
						<li>linking between pages about the same resource</li>
						<li>linking between <q>similar resources</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Resources on the Web</title>
				<img src="web-resources.png" style="height : 70% ; margin : 2% ; " title="Linked Resources on the Web"/>
			</slide>
			<part>
				<title>Describing Resources with Microformats</title>
				<slide>
					<title>Metadata as Markup Overlay</title>
					<img src="web-resources-microformats.png" style="height : 70% ; margin : 2% ; " title="Embedding Microformats in Web Resources"/>
				</slide>
				<slide>
					<title>Surfacing Concepts</title>
					<ul>
						<li><link href="microformats"/> are embedded into HTML Web content</li>
						<ul>
							<li>there is no need for alternative representations</li>
							<li>machines use the same resources as humans (and extract microformat data)</li>
						</ul>
						<li>Many larger Web sites are using structured back-ends</li>
						<ul>
							<li>for pure HTML publishing, semantics are translated into HTML/CSS</li>
							<li>more semantics can be represented by also producing microformats</li>
						</ul>
						<li>Microformats can also help in decentralized service scenarios</li>
						<ul>
							<li>centralized: data has to be submitted to a hub</li>
							<li>decentralized: crawlers search for microformats and use what they find</li>
							<li>advantage of decentralization: loose coupling and independent control</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Publishing Resources as XML</title>
				<slide>
					<title>Different Representations</title>
					<img src="web-resources-xml.png" style="height : 70% ; margin : 2% ; " title="Publishing Resources as XML"/>
				</slide>
				<slide>
					<title>XML as Custom Markup</title>
					<ul>
						<li><link href="xml">XML</link> can represent arbitrary (textual) data</li>
						<ul>
							<li>works very well for tree-structured and document-style data</li>
							<li>works not so well for graph-like data with no inherent order</li>
						</ul>
						<li>XML only defines a <em>syntax for representing ordered trees</em></li>
						<ul>
							<li>what the elements and attributes mean requires agreement on the vocabulary</li>
							<li>XML is a good tool for data exchange, but only for the syntax part</li>
						</ul>
						<li>Agreement can be based on three different approaches</li>
						<ol>
							<li>everybody always uses a universally useful vocabulary (XHTML)</li>
							<li>user (groups) agree on vocabularies based on mutual interest</li>
							<li>user (groups) agree on modules and build vocabularies with these modules (<a href="http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=ubl">UBL</a>)</li>
						</ol>
					</ul>
				</slide>
			</part>
			<part>
				<title>Transforming Resources into RDF</title>
				<slide>
					<title>Translation into a Universal Metamodel</title>
					<img src="web-resources-rdf-per-resource.png" style="height : 70% ; margin : 2% ; " title="GRDDL for Transforming Markup into RDF"/>
				</slide>
				<slide>
					<title>The Semantic Web Vision</title>
					<ul>
						<li><link href="rdf">RDF</link> is a more general model for structured data</li>
						<ul>
							<li>it is very fine-granular and can represent (almost) everything</li>
							<li>its granularity becomes problematic in scenario with coarse granularity</li>
						</ul>
						<li>Transforming resources can be done in two basic ways</li>
						<ol>
							<li>using well-defined mappings between HTML and RDF (<a href="http://www.w3.org/TR/grddl-primer/" title="Gleaning Resource Descriptions from Dialects of Languages">GRDDL</a>)</li>
							<li>extracting information by analysis such as <a href="http://en.wikipedia.org/wiki/Natural_language_processing" title="Natural Language Processing">NLP</a></li>
						</ol>
						<li>Transforming resources does not require cooperation of the service</li>
						<ul>
							<li>any service can be crawled and the resources can be transformed</li>
						</ul>
						<li>Transforming resources does not allow access to <q>all data</q></li>
						<ul>
							<li>crawling can have limits and thus the RDF data is limited as well</li>
							<li>with <em>everything as RDF</em> the complete RDF graph could be queried</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Transforming Services into RDF</title>
				<slide>
					<title>Bypassing Web Publishing</title>
					<img src="web-resources-rdf-per-service.png" style="height : 70% ; margin : 2% ; " title="Using Structured Data Sources"/>
				</slide>
				<slide>
					<title>Trillions of Triples</title>
					<ul>
						<li>Turn the structured data from the back-end into RDF</li>
						<ul>
							<li>the complete dataset can be transformed in one processing step</li>
							<li>relationships between resources may be better preserved than by crawling</li>
							<li>keeping the snapshot current can become a significant problem</li>
						</ul>
						<li>With everything in one database (a <em href="http://en.wikipedia.org/wiki/Triplestore">triple store</em>) queries become possible</li>
						<ul>
							<li><em>SPARQL queries can query the graph of complete service data</em></li>
							<li>large datasets easily translate into several billion triples</li>
							<li>triple store implementation moves to native RDF databases</li>
						</ul>
						<li>Large triple stores can process complex SPARQL queries</li>
						<ul>
							<li>performance is a problem with large datasets</li>
							<li>RDF's simplicity bites back in the form of <em href="http://en.wikipedia.org/wiki/Reification_%28computer_science%29#Reification_on_Semantic_Web">reification</em></li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="conclusions">
			<title>Conclusions</title>
			<slide>
				<title>Exposing Structured Data</title>
				<ul>
					<li>Exposing semantics makes data more valuable</li>
					<li>Various technologies and approaches can be chosen</li>
					<li>What are the economics behind allowing/supporting reuse?</li>
					<li>Different implementations fit different scenarios</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="architecture">
        <title short="Web Architecture">Architecture of the World Wide Web</title>
        <date>2009-11-24</date>
        <toc class="assignment"><a href="a/7/">A7</a>&#160;assigned (due&#160;date:&#160;12/6)</toc>
        <toc class="reading"><a href="http://www.martinfowler.com/ieeeSoftware/whoNeedsArchitect.pdf" title='Martin Fowler, "Who Needs an Architect?," IEEE Software, vol. 20,  no. 5,  pp. 11-13,  Sept/Oct 2003'>Architecture?</a>&#160;· <a href="http://www.w3.org/TR/webarch/summary.html" title="W3C Web Architecture Specification Summary">Architecture Summary</a></toc>
        <toc class="resources"><a href="http://www.w3.org/TR/webarch/" title="W3C Web Architecture Specification">Architecture</a></toc>
        <toc class="abstract">The Web's architecture has very simple principles revolving around the ideas of <em>placing a heavy emphasis on a consistent and global identification mechanism for resources</em>, a <em>standardized way of how resource representations can be retrieved</em>, and a <em>standardized way of how resource representations should be usable by using standardized media types</em>. This lecture presents an overview of these architectural principles and illustrates them with using blogs as an example of Web-based applications.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Today's Reading</title>
			<blockquote><a href="http://www.w3.org/TR/webarch/summary.html">Summary</a> of <a href="http://www.w3.org/TR/webarch/">Ian Jacobs, Norman Walsh, <q>Architecture of the World Wide Web, Volume One</q>, World Wide Web Consortium, Recommendation REC-webarch-20041215, December 2004</a></blockquote>
			<ul>
				<li>Examples (or counter-examples) for the following principles, practices, and constraints:</li>
				<ul>
					<li>URIs identify a single resource (versioning, aliases)</li>
					<li>URI opacity (assuming a specific resource representation)</li>
					<li>Available representations (XML namespaces as really bad example)</li>
					<li>Hypertext links (resource representations should be good Web citizens)</li>
					<li>Orthogonality (identification, interaction, and representation are orthogonal)</li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Parsimony</title>
			<slide>
				<title>Keep It Simple</title>
				<ul>
					<li>Loose coupling vs. tight coupling</li>
					<ul>
						<li>fewer requirements for cooperation mean fewer potential sources of problems</li>
						<li>taking independent developments into consideration (graceful degradation)</li>
					</ul>
					<li>Parsimony may conflict with optimization</li>
					<ul>
						<li>a fully backlinked Web would be a very different hypermedia system</li>
						<li>modifying resources would be expensive and require considerable efforts</li>
						<li>an uncontrolled Web allows failure and innovative development</li>
					</ul>
					<li>Programming Languages vs. Frameworks</li>
					<ul>
						<li>programming languages are very simple and very powerful</li>
						<li>frameworks are more complex and have some choices built into them</li>
						<li>both can be used to build good systems</li>
						<li>framework applications are more likely to not do really innovative things</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Web Design as System Design</title>
				<blockquote>There are two ways of constructing a software design: One way is to make it so simple that there are <em>obviously</em> no deficiencies, and the other way is to make it so complicated that there are no <em>obvious</em> deficiencies. The first method is far more difficult.</blockquote>
				<p class="quotenote"><a href="http://en.wikipedia.org/wiki/Charles_Antony_Richard_Hoare">C. A. R. Hoare</a>, <a href="http://dret.net/biblio/reference/hoa81"><q>The Emperor's Old Clothes</q>, 1980 Turing Award Lecture</a></p>
				<ul>
					<li>Web: URI + HTTP + HTML ( + XML)</li>
					<li>OASIS: <a href="http://www.infoworld.com/article/07/08/09/sca-oasis_1.html">Six SOA simplification committees</a> for <a href="http://en.wikipedia.org/wiki/List_of_Web_service_specifications">about 60 WS-* specs</a></li>
				</ul>
			</slide>
			<slide>
				<title>Technology Blinders</title>
				<ul>
					<li>Web architecture is an additional set of constraints</li>
					<ul>
						<li>it is not a very complicated set of constraints</li>
						<li>but it still makes life more complicated than in an unconstrained world</li>
						<li>it may require a major redesign of an application</li>
					</ul>
					<li>Technology providers sometimes ignore Web architecture</li>
					<ul>
						<li>multimedia presentation concepts are often disconnected from the Web</li>
						<li>hypermedia researchers often regard the Web as inferior (or not as hypermedia at all)</li>
						<li>questions of client capabilities are often ignored (or brushed aside using statistics)</li>
					</ul>
					<li>Integration vs. Transport</li>
					<ul>
						<li>integrating into the Web requires applications to conform to Web architecture</li>
						<li>sitting on top of the Web just requires to use HTTP for data transfer</li>
						<li>many <q>Web Technologies</q> are <em>not</em> integrated into the Web</li>
						<li>many <q>Web Applications</q> are <em>not</em> integrated into the Web</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Principles</title>
			<slide>
				<title>Identification</title>
				<ul>
					<li>Everything should be identified in a uniform way</li>
					<li>Identification and access methods evolve over time</li>
					<ul>
						<li><uri>sms:</uri> and <uri>callto:</uri> did not exist when the Web was created</li>
					</ul>
					<li>Identification and access support evolve over time</li>
					<ul>
						<li><uri>tel:</uri> now can be supported by an increasing number of clients</li>
					</ul>
					<li>The Web is one huge proof for the power of <em>network effects</em></li>
					<ul>
						<li>it also is a lesson for many who did not take it seriously and failed</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Interaction</title>
				<ul>
					<li>Many URI schemes are named after protocols</li>
					<ul>
						<li><uri>http:</uri> can be accessed using the <em>Hypertext Transfer Protocol (HTTP)</em></li>
						<li><uri>ftp:</uri> can be accessed using the <em>File Transfer Protocol (FTP)</em></li>
						<li><uri>mailto:</uri> sends electronic mail using the <em>Simple Mail Transfer Protocol (SMTP)</em></li>
					</ul>
					<li>Some URI schemes do not really imply a protocol</li>
					<ul>
						<li><uri>mailto:</uri> sends electronic mail using the <em>Simple Mail Transfer Protocol (SMTP)</em></li>
						<li><uri>mailto:</uri> may use any other appropriate technology for sending email</li>
						<li><a href="http://dret.typepad.com/dretblog/2008/06/web-based-sms.html">instead of using protocols directly, they can be accessed indirectly through services</a></li>
					</ul>
					<li>Some URI schemes have no protocol for dereferencing resources</li>
					<ul>
						<li><uri>urn:</uri> URIs are abstract names from some namespace</li>
						<li><uri>urn:ietf:rfc:2648</uri> identifies an IETF standard and not some specific copy</li>
						<li><uri href="http://maps.google.com/maps?ll=27.988056,86.925278&amp;spn=0.1,0.1&amp;q=27.988056,86.925278+(Mount_Everest)">geo:27.988056,86.925278</uri> identifies a physical resource (accessing it is really hard)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Data Formats</title>
				<ul>
					<li>Agreement on the interpretation of resource representations</li>
					<li>HTML was the first standardized data format on the Web</li>
					<ul>
						<li>CSS and XML have become successful formats as well</li>
					</ul>
					<li>Some data formats are <em>de-facto standards</em> as Web formats</li>
					<ul>
						<li>GIF and JPEG for images and PNG as the successor of GIF</li>
					</ul>
					<li>Some formats are less integrated but still widely used</li>
					<ul>
						<li>PDF for paginated documents</li>
					</ul>
					<li>Some formats have become replacements for missing standards</li>
					<ul>
						<li>Flash for audio and video because no single format was sufficiently successful</li>
					</ul>
					<li>Some formats were intended to become standards but failed</li>
					<ul>
						<li>SVG for vector graphics</li>
						<li>SMIL for multimedia presentations</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Identifier, Resource, and Representation</title>
				<img style="height : 75% ; margin : 2% ; " src="uri-res-rep.png" href="http://www.w3.org/TR/webarch/#p21"/>
			</slide>
		</part>
		<part>
			<title>Constraints and Good Practices</title>
			<slide>
				<title>Constraints</title>
				<ul>
					<li>Some things on the Web can be inconsistent</li>
					<ul>
						<li>guaranteeing consistency by design can lead to tight coupling</li>
						<li>well-defined ways of handling inconsistencies are better scalable</li>
					</ul>
					<li>Some things on the Web are not perfect</li>
					<ul>
						<li>technologies being used in ways not anticipated (XML, XML Namespaces)</li>
						<li>company goals vs. the greater good (<a href="http://en.wikipedia.org/wiki/Browser_wars">browser war</a>)</li>
					</ul>
					<li><q>The ideal Web</q> vs. <q>the real Web</q></li>
					<ul>
						<li>dealing with a given landscape can introduce additional constraints</li>
						<li>handling these constraints should not violate the general principles</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Good Practices</title>
				<ul>
					<li>Design for openness and extensibility is a key factor</li>
					<ul>
						<li>design for and support evolution and extension and reuse</li>
						<li>try to be a good Web citizen by embracing integration</li>
					</ul>
					<li>Design with the Web in mind</li>
					<ul>
						<li>use Web standards where appropriate (URIs for identification)</li>
						<li>even intranet applications typically evolve and should be designed for the Web</li>
					</ul>
					<li>Make content visible, accessible, usable, reusable</li>
					<ul>
						<li>URI design guidelines should be defined and followed</li>
						<li>think about aggregation and granularity and access to resources</li>
						<li>use well-defined and well-documented XML for B2B scenarios</li>
						<li>reuse existing vocabularies or vocabulary parts whenever possible</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="dretblog">
			<title>Blogs as Web Applications</title>
			<slide>
				<title>Blog in XML</title>
				<listing src="dretblog.xml"/>
			</slide>
			<slide>
				<title>Support URI Guessing (Year Index)</title>
				<listing src="dretblog2html.xsl" line="43-56"/>
			</slide>
			<slide>
				<title>Support URI Guessing (Month Index)</title>
				<listing src="dretblog2html.xsl" line="57-73"/>
			</slide>
			<slide>
				<title>Support URI Guessing (Day Index)</title>
				<listing src="dretblog2html.xsl" line="74-92"/>
			</slide>
			<slide>
				<title>Support Spontaneous Navigation</title>
				<listing src="dretblog2html.xsl" line="26-39"/>
			</slide>
			<slide>
				<title>Publishing as Atom Feed</title>
				<listing src="dretblog2atom.xsl" line="4-27"/>
			</slide>
			<slide>
				<title>Blog as Atom Feed</title>
				<listing src="dretblog.atom" line="2-26"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Web Architecture Essentials</title>
				<ul>
					<li>Principles (violating these causes architectural problems)</li>
					<li>Constraints (disregarding these causes technical problems)</li>
					<li>Good Practices (ignoring these causes user problems)</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="i18n+l10n">
        <title short="I18N &amp; L10N">Internationalization (I18N) &amp; Localization (L10N)</title>
        <date>2009-12-01</date>
        <toc class="reading"><a href="http://www.w3.org/TR/itsreq/" title='W3C&apos;s "Internationalization and Localization Markup Requirements"'>I18N &amp; L10N Markup</a></toc>
        <toc class="resources"><a href="http://unicode.org/" title="Unicode Web Site">Unicode</a>&#160;· <a href="http://homepages.cwi.nl/~dik/english/codes/stand.html" title="History of Character Sets">History</a>&#160;· <a href="http://www.w3.org/TR/i18n-html-tech-lang/" title='W3C&apos;s "Specifying Language in XHTML &amp; HTML Content"'>Content</a>&#160;· <a href="http://www.w3.org/TR/its/" title='W3C&apos;s "Internationalization Tag Set (ITS) Version 1.0"'>Tag Set</a></toc>
        <toc class="abstract">Many publishing environments need to support multiple languages. In many cases, the requirement to support multiple languages surfaces in later stages of a product development or publishing solution, which can cause major design changes, driving up costs. <em>Internationalization (I18N)</em> is the approach to design systems which can adapt to different locales. <em>Localization (L10N)</em> is the activity to identify, define, and encode locales, based on internationalized software. For languages using different alphabets, <em>Unicode</em> is the most popular character set today and provides a variety of encoding schemes, each of them being a <em>Unicode Transformation Format (UTF)</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
        <part id="characters">
			<title>Characters</title>
			<slide>
				<title>Characters and Computers</title>
				<ul>
					<li><em>American Standard Code for Information Interchange (ASCII)</em></li>
					<ul>
						<li>for the first time a basic set of characters had a universally accepted encoding</li>
						<li>many Internet protocols (such as <a href="../services-fall06/web1#(12)">HTTP</a>) encode their information in ASCII commands</li>
					</ul>
					<li>ASCII is a very limited repertoire of characters</li>
					<ul>
						<li>basic ASCII contains 128 characters (7 bit) with a number of control chars</li>
						<li>no variants of characters (german umlauts, french accents) are supported</li>
						<li>various code pages extending ASCII to 8 bit exist and are hard to distinguish</li>
					</ul>
					<li><em>Character</em> is not a trivial concept when regarded globally</li>
					<ul>
						<li>european languages all have writing systems based on a small number of <q>atoms</q></li>
						<li>other languages and writing systems have vastly different ideas of <q>language atoms</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Characters</title>
				<blockquote>Character. (1) The smallest component of written language that has semantic value; refers to the abstract meaning and/or shape […]</blockquote>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/unicode4"><em>The Unicode Standard, Version 4.0</em>, Addison-Wesley, 2003</a></p>
				<ul>
					<li>The alphabetic approach is only one of several possibilities</li>
					<ul>
						<li>A character in <em>Japanese hiragana and katakana scripts</em> corresponds to a syllable (usually a combination of consonant plus vowel)</li>
						<li><em>Korean Hangul</em> combines symbols for individual sounds of the language into square blocks, each of which represents a syllable; depending on the user and the application, either the individual symbols or the syllabic clusters can be considered to be characters</li>
						<li>In <em>Indic scripts</em> each consonant letter carries an inherent vowel that is eliminated or replaced using semi-regular or irregular ways to combine consonants and vowels into clusters; depending on the user and the application, either individual consonants or vowels, or the consonant or consonant-vowel clusters can be perceived as characters</li>
						<li><em>Arabic and Hebrew vowel sounds</em> are typically not written at all; when they are written they are indicated by the use of combining marks placed above and below the consonantal letters</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Glyphs</title>
				<blockquote>[A Glyph is] a recognizable abstract graphic symbol which is independent of a specific design.</blockquote>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/iso9541"><em>ISO/IEC 9541:1991, Information Technology – Font Information Interchange</em></a></p>
				<ul>
					<li><em>Visual rendering</em> introduces the notion of a glyph.</li>
					<li>There is <em>not</em> a one-to-one correspondence between characters and glyphs</li>
					<ul>
						<li>A single character can be represented by multiple glyphs (each glyph is then part of the representation of that character); these glyphs may be physically separated from one another</li>
						<li>A single glyph may represent a sequence of characters (this is the case with ligatures, among others)</li>
						<li>A character may be rendered with very different glyphs depending on the context</li>
						<li>A single glyph may represent different characters (e.g. capital Latin A, capital Greek A and capital Cyrillic A)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="charactersets">
			<title>Character Sets</title>
			<slide>
				<title>History of Character Sets</title>
				<ul>
					<li>Text documents need ways to represent characters</li>
					<ul>
						<li>computers handle bits, not characters</li>
						<li>to handle characters, computers need a mapping from characters to bits</li>
					</ul>
					<li>For a long time, computers were doing their work in a very isolated way</li>
					<ul>
						<li><q>I think there is a world market for maybe five computers.</q> (<a href="http://en.wikipedia.org/wiki/Thomas_J._Watson#Famous_misquote">¬ T. J. Watson</a>)</li>
					</ul>
					<li>With more computers being used, more data is exchanged between computers</li>
					<li><em>Data rot</em> happens on all levels (media, formats, applications)</li>
					<li>Standardization of character sets started in the 60's</li>
					<ul>
						<li>ASCII was the first generally accepted character set</li>
						<li>EBCDIC was invented and marketed by IBM (and a terribly designed character encoding)</li>
						<li>ISO 8859 was the first attempt to better support character sets beyond ASCII</li>
						<li><em>asian scripts</em> were always a problem because of the number of characters they need</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>ASCII 1963</title>
				<img style="width : 90% ; margin : 2% ; " src="ascii-1963.gif" title="ASCII 1963"/>
			</slide>
			<slide>
				<title>ASCII 1965</title>
				<img style="width : 90% ; margin : 2% ; " src="ascii-1965.gif" title="ASCII 1965"/>
			</slide>
			<slide>
				<title>ASCII 1967</title>
				<img style="width : 90% ; margin : 2% ; " src="ascii-1967.gif" title="ASCII 1967"/>
			</slide>
			<slide>
				<title>Beyond ASCII</title>
				<ul>
					<li>ASCII is called ASCII for a reason</li>
					<ul>
						<li>it works well for english-speaking countries</li>
						<li>the majority of other languages cannot be represented</li>
					</ul>
					<li>Character sets and the 8 bit computer start to collide</li>
					<ul>
						<li>ASCII is very convenient because characters and bytes correspond 1:1</li>
						<li>every character set expanding ASCII will make this more complicated</li>
						<li>complications can occur within and/or outside of the character set</li>
					</ul>
					<li>Introducing a character set beyond 8 bit is a fundamental change</li>
					<ul>
						<li>dealing with and counting bytes is a seductively simple idea</li>
					</ul>
					<li>Introducing several 8 bit character sets saves the 8 bit world</li>
					<ul>
						<li>by introducing several character sets, each of them can remain 8 bit</li>
						<li>the complexity has now been shifted to the handling of various character sets</li>
					</ul>
				</ul>
			</slide>
			<slide id="iso8859">
				<title>ISO 8859</title>
				<ul>
					<li>A <em>family of character sets</em> rather that a single character set</li>
					<ul>
						<li>each ISO 8859 family member is an 8 bit character set (256 characters)</li>
						<li>the lower half (128 characters) are always the same (ASCII)</li>
						<li>the upper half is supporting different user groups and changes between versions</li>
					</ul>
					<li>ISO 8859 files cannot be identified by inspection</li>
					<ul>
						<li>ASCII characters can always be safely interpreted (identical on all ISO 8859 code pages)</li>
						<li>the upper half can only be interpreted if the code page is well-known</li>
					</ul>
					<li>ISO 8859 environments must carefully track the code pages being used</li>
					<ul>
						<li>failure to do so results in misinterpretation of characters</li>
					</ul>
					<listing src="iso8859-15.txt" encoding="ISO-8859-15" line="3-3" title='iso8859-15.txt included with encoding="ISO-8859-15"'/>
					<listing src="iso8859-15.txt" encoding="ISO-8859-1" line="3-3" title='iso8859-15.txt included with encoding="ISO-8859-1"'/>
				</ul>
			</slide>
			<slide>
				<title>ISO 8859-1 (Latin-1) &amp; ISO 8859-2 (Latin-2)</title>
				<table width="95%">
					<tr>
						<td>
							<img style="width : 90% ; margin : 2% ; " src="iso-8859-1.gif" title="ISO 8859-1 (Latin-1)"/>
							<br/>
							<p style="text-align : center">Latin-1 (Western European)</p>
						</td>
						<td>
							<img style="width : 90% ; margin : 2% ; " src="iso-8859-2.gif" title="ISO 8859-2 (Latin-2)"/>
							<br/>
							<p style="text-align : center">Latin-2 (Central European)</p>
						</td>
					</tr>
				</table>
			</slide>
			<slide>
				<title>ISO 8859-7 (Greek) &amp; ISO 8859-15 (Latin-9)</title>
				<table width="95%">
					<tr>
						<td>
							<img style="width : 90% ; margin : 2% ; " src="iso-8859-7.gif" title="ISO 8859-7 (Greek)"/>
							<br/>
							<p style="text-align : center">Greek</p>
						</td>
						<td>
							<img style="width : 90% ; margin : 2% ; " src="iso-8859-15.gif" title="ISO 8859-15 (Latin-9)"/>
							<br/>
							<p style="text-align : center">Latin-9</p>
						</td>
					</tr>
				</table>
			</slide>
		</part>
		<part id="unicode-basics">
			<title>Unicode Basics</title>
			<slide>
				<title>ISO 8859 Problems</title>
				<ul>
					<li>One document can only contain characters from one character set</li>
					<ul>
						<li>mixing characters from different sets is impossible</li>
					</ul>
					<listing src="currency-euro.txt"/>
					<li>An increasing number of character sets does not make life easier</li>
					<ul>
						<li>in particular, if they sometime differ only slightly (e.g., Latin-1 vs. Latin-9)</li>
					</ul>
					<li>For bigger character sets, the 8 bit approach is not working at all</li>
					<ul>
						<li>the ISO 8859 approach allows only 128 special characters (the lower half is ASCII)</li>
					</ul>
					<li>ISO 8859 is as good as it gets with 8 bit</li>
					<ul>
						<li>to improve this approach, the 8 bit philosophy must be abandoned</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Unicode</title>
				<ul>
					<li>Generalization takes characters beyond one and even two bytes</li>
					<ul>
						<li>Unicode has been designed to cover all characters of the world</li>
						<li>Unicode recently added its 100,000<sup>th</sup> character</li>
						<li>for handling this character set, a more structured approach is required</li>
					</ul>
					<li>Unicode cleanly separates various conceptual steps</li>
					<ul>
						<li>characters are collected and are then part of the <em>character repertoire</em></li>
						<li>characters are then identified by a unique <em>code point</em> (written as <code>U+0041</code>)</li>
						<li>a <em>Character Encoding Scheme (CES)</em> then maps the <em>Coded Character Set (CCS)</em> based on a <em>Character Encoding Form (CEF)</em></li>
					</ul>
					<li><q><a href="../xml-fall09/basics#(6)">XML is ASCII for the 21<sup>st</sup> century</a></q></li>
					<ul>
						<li>purists sometimes consider Unicode too big or dangerous</li>
						<li>Unicode is well-established and is necessary in a globalized economy</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Unicode Character Count</title>
				<ul>
					<li>Unicode has the ability to encode 17 × 2<sup>16</sup> = 1'114'112 characters</li>
					<ul>
						<li>this means that currently ~10% of the available space is used</li>
					</ul>
					<li>Characters are organized into 17 <q>planes</q> of 2<sup>16</sup> = 65'536 characters</li>
					<li>Planes are numbered from <q>0</q> to <q>16</q></li>
					<li>Plane 0 is the <em>Basic Multilingual Plane (BMP)</em></li>
					<ul>
						<li>all characters which in practical use today are part of the BMP (well, <a href="http://www.tlg.uci.edu/~opoudjis/unicode/unicode_astral.html">almost …</a>)</li>
					</ul>
					<li>Planes beyond the BMP contain rare and historic characters</li>
					<ul>
						<li><q><a href="http://unicode.org/charts/PDF/U10300.pdf">Old Italic</a></q>, <q><a href="http://unicode.org/charts/PDF/U10400.pdf">Deseret</a></q>, <q><a href="http://unicode.org/charts/PDF/U1D000.pdf">Byzantine Musical Symbols</a></q></li>
						<li>most space within these <q>astral planes</q> is empty</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Unicode Encodings</title>
				<table style="margin : 5% ; width : 85% ; "> 
					<tr><th/><th>A</th><th>א</th><th>好</th><th><img src="U+233B4.gif" style="height : 1em ; "/></th></tr> 
					<tr><th>Code point</th><td>U+0041</td><td>U+05D0</td><td>U+597D</td><td>U+233B4</td></tr> 
					<tr><th>UTF-8</th><td>41</td><td>D7 90</td><td>E5 A5 BD</td><td>F0 A3 8E B4</td></tr> 
					<tr><th>UTF-16</th><td>00 41</td><td>05 D0</td><td>59 7D</td><td>D8 4C DF B4</td></tr> 
					<tr><th>UTF-32</th><td>00 00 00 41</td><td>00 00 05 D0</td><td>00 00 59 7D</td><td>00 02 33 B4</td></tr> 
				</table>
			</slide>
			<slide>
				<title>UTF-8</title>
				<ul>
					<li>UTF-8 is one of the two standardized encodings for XML</li>
					<ul>
						<li>every ASCII document by definition is a UTF-8 document</li>
						<li>UTF-8 must be supported by every XML implementation</li>
					</ul>
					<li>UTF-8 is not trivial, but it is widely supported and easy to implement</li>
					<ul>
						<li>there is no 1:1 correspondence between bytes and characters</li>
						<li>each Unicode character is encoded by 1-6 bytes</li>
						<li>UTF-8 is good for europeans (1 byte per ASCII character)</li>
					</ul>
					<li>Non-Unicode documents must be <em>transcoded</em> to UTF-8</li>
					<ul>
						<li>keeping track of resource character encodings is a good idea</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Other UTFs</title>
				<ul>
					<li>UTF-16 stores every character as 2 or more bytes</li>
					<ul>
						<li>BMP characters are stored as 2 bytes</li>
						<li>astral plane characters are stored as 4 bytes</li>
						<li>UTF-16 is the other encoding (in addition to UTF-8) required by XML</li>
					</ul>
					<li>UTF-32 stores every character as 4 bytes</li>
					<ul>
						<li>very simple and very inefficient (requires four times more space than ASCII)</li>
					</ul>
					<li>Multi-byte formats introduce the problem of <em>byte order</em></li>
					<ul>
						<li>UTF-16/32BE and UTF-16/32LE are stored with guaranteed endian</li>
						<li>UTF-16/32 may use a <em>Byte Order Mark (BOM)</em> (U+FEFF) to detect the endian</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Character Set Identification</title>
				<ul>
					<li>HTTP, XML, and HTML support character set identification</li>
					<ul>
						<li><a href="../services-fall06/web1#(12)">HTTP</a> supports the <code>Content-Type</code> <a href="../services-fall06/web1#(17)">header field</a></li>
						<pre>Content-Type: text/html; charset=utf-8</pre>
						<li>XML encodes the character set <a href="../xml-fall09/basics#(8)">in the XML declaration</a></li>
						<pre>&lt;?xml version="1.0" encoding="utf-8"?></pre>
						<li>HTML supports the <elem>meta</elem> element in the document's <elem>head</elem></li>
						<pre>&lt;meta http-equiv="Content-Type" content="text/html;charset=utf-8"></pre>
					</ul>
					<li>Different identifications serve different purposes</li>
					<li><em>Conflicting identifications</em> are a sign of management problems</li>
				</ul>
			</slide>
			<slide id="unicode-normalization">
				<title>Unicode is Complex</title>
				<ul>
					<li>Many languages have characters which are composed</li>
					<ul>
						<li>german umlauts are vowels with a double dot</li>
						<li>french accents and the cedilla are also added to <q>regular</q> characters</li>
					</ul>
					<li>Unicode contains composed as well as composing characters</li>
					<ul>
						<li>for most languages, composed characters are considered to be regular characters</li>
						<li>in some circumstances, it might be required to compose a character out of a base and a <em>diacritical mark</em></li>
						<li>as a result, the question arises how to define the equality of these variants</li>
					</ul>
					<li>Unicode defines <em>normal forms</em> which prescribe one variant</li>
					<ul>
						<li>a complex field with different concepts of equivalence (<em>canonical</em> and <em>compatibility</em>)</li>
						<li>based on the equivalence forms, there are four <a href="http://www.unicode.org/reports/tr15/">normalization forms</a></li>
					</ul>
				</ul>
				<listing src="francais.xml" line="2-2" title="Unnormalized Unicode"/>
				<listing src="francais.xml" line="2-2" encoding="ISO-8859-1" title='Unnormalized UTF-8 with encoding="ISO-8859-1"'/>
			</slide>
			<slide id="unicode-transcoding">
				<title>Transcoding</title>
				<ul>
					<li>Handling Unicode in an unconstrained environment is not easy</li>
					<ul>
						<li>Unicode's size and variability make character processing harder then it used to be</li>
						<li>for some applications with limited character need, this might be too much</li>
					</ul>
					<li>In all text-based environments, well-defined rules must be defined for:</li>
					<ol>
						<li>the encoding of documents (maybe if variants, such as Unicode normalization forms)</li>
						<li>accepted incoming encodings and how they are mapped to the internal encoding</li>
						<li>available outgoing encodings and how they can be requested and will be generated</li>
					</ol>
					<li><em>Transcoding</em> is the activity of changing the encoding of data</li>
					<ul>
						<li>ideally, transcoding should be lossless and round-trip proof</li>
						<li>in any scenario with non-trivial encodings, this is not an easy goal</li>
						<li>even staying with one encoding can be a problem (if there are variations allowed)</li>
					</ul>
					<li>Transcoding is essential for maintaining data quality</li>
				</ul>
			</slide>
        </part>
        <part id="i18n">
			<title short="I18N">Internationalization (I18N)</title>
			<slide>
				<title>What is Language?</title>
				<ul>
					<li>Language is more than the encoding of individual words</li>
					<ul>
						<li>languages and culture are deeply intertwined and inseparable</li>
						<li>ideally, systems/solutions should adapt to culture, not only to language</li>
						<li>on a superficial level, adapting to language is useful a first step</li>
					</ul>
					<li>Languages have properties which are beyond character sequences</li>
					<ul>
						<li>for right-to-left languages, screen layout should be done right-to-left</li>
						<li>if languages are mixed, one language has to be the preferred language</li>
					</ul>
					<li>Language identification and selection are basic I18N tasks</li>
					<ul>
						<li>the <q>one language fits all</q> assumption is becoming increasingly inappropriate</li>
						<li>the <q>just switch the labels</q> strategy also may be too little for true L10N</li>
					</ul>
				</ul>
			</slide>
			<slide id="beyond-language">
				<title>Beyond Language</title>
				<img src="uighur.png" style="float : right ; margin : 1em ; " href="http://www.linguamongolia.co.uk/script1.html" title='"Mongol" in Uighur Script'/>
				<ul>
					<li>Directionality is the concept how written language is organized</li>
					<ul>
						<li>early languages had no inherent directionality (even zigzag writing was possible)</li>
						<li>the majority of today's languages are <em>left-to-right</em> languages</li>
						<li>Arabic and Hebrew are two popular <em>right-to-left</em> languages</li>
						<li>Chinese can be written <em>top-to-bottom</em> and <em>right-to-left</em></li>
						<li>Mongolian (using the <a href="http://www.linguamongolia.co.uk/">Uighur alphabet</a>) is written <em>top-to-bottom</em> and <em>left-to-right</em></li>
					</ul>
					<li>Icons can be very culture-specific and often need to be localized as well</li>
					<img src="owl.png" style="float : left ; width : 20% ; " title="Wisdom or Witchcraft?"/>
					<ul>
						<li>icons are pictorial metaphors (rooted in language and/or culture)</li>
						<li>language-specific metaphors may not work across languages (e.g., <em>OK gesture</em>)</li>
						<li>culture-specific metaphors may not work across cultures (e.g., <em>owls</em>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Definition</title>
				<blockquote>Internationalization is the design and development of a product, application or document content that enables easy localization for target audiences that vary in culture, region, or language.</blockquote>
				<ul>
					<li>I18N starts at the design phase and influences the complete process</li>
					<li>The upfront costs of I18N are considerable (increased complexity)</li>
					<li>The costs of retroactive I18N are much higher than that</li>
				</ul>
			</slide>
			<slide>
				<title>I18N Tasks</title>
				<ol>
					<li>UI elements (windows, menus) must be modified to accept translated text</li>
					<li>Static text must be made configurable</li>
					<li>Icons and graphics must be changed to be more culturally appropriate</li>
					<li>Sound files that contain spoken language must be re-recorded</li>
					<li>Online help must be translated</li>
					<li>Dynamic text (dates, times) must be formatted using the locale</li>
					<li>Text handling code must calculate word breaks using the locale</li>
					<li>Tabular data must be sortable using the locale</li>
				</ol>
			</slide>
		</part>
        <part id="l10n">
			<title short="L10N">Localization (L10N)</title>
			<slide>
				<title>Definition</title>
				<blockquote>Localization refers to the adaptation of a product, application or document content to meet the language, cultural and other requirements of a specific target market (a <q>locale</q>).</blockquote>
				<ul>
					<li>Without proper I18N, L10N is a very expensive and risky process</li>
					<li>L10N based on internationalized products should be straight-forward</li>
					<li>during L10N for specific locales, new areas for I18N might be identified</li>
					<ul>
						<li>some non-internationalized part of the product is identified as inappropriate</li>
					</ul>
					<li>L10N should be regarded as an input for improved I18N</li>
				</ul>
			</slide>
			<slide>
				<title>L10N Tasks</title>
				<ol>
					<li>Create translations for all interface elements</li>
					<li>Translate all static texts</li>
					<li>If necessary, create localized icons and graphics</li>
					<li>Any spoken text must be recorded in the target language</li>
					<li>Make sure that the localized product uses the localized online help</li>
					<li>Formatting of data types must be treated locale-specific</li>
					<li>If necessary, dictionaries and other language tools must be integrated</li>
					<li>Sorting functions in the code must respect the locale</li>
				</ol>
			</slide>
        </part>
		<part id="xml:lang">
			<title>Language Identification in Resources</title>
			<slide>
				<title>Language Codes</title>
				<ul>
					<li>Language identification is required in many contexts</li>
					<ul>
						<li>successful identification needs a standardized set of <em>language tags</em></li>
					</ul>
					<li><a href="http://dret.net/rfc-index/reference/RFC4646" title="Language Tag RFC">RFC 4646</a> defines <q>Tags for Identifying Languages</q></li>
					<ul>
						<li>two letter codes such as <code>en</code> are interpreted according to <a href="http://dret.net/biblio/reference/iso639-1">ISO 639-1</a></li>
						<li>three letter codes such as <code>eng</code> are interpreted according to <a href="http://dret.net/biblio/reference/iso639-2">ISO 639-2</a></li>
						<li><q><code>x-</code></q> indicates a value which is not standardized (requires mutual agreement)</li>
						<li><em>subtags</em> such as <code>en-US</code> specify additional properties (regions, dialects, scripts, …)</li>
					</ul>
					<li><a href="http://dret.net/rfc-index/reference/RFC4647" title="Language Tag Matching RFC">RFC 4647</a> defines the <q>Matching of Language Tags</q></li>
					<ul>
						<li>matchmaking between requested and available languages</li>
						<li>how to proceed if <code>en-US</code> or <code>de</code> are requested but available variants are <code>en</code> and <code>de</code></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>ISO 639-2 Code List</title>
				<listing src="ISO-639-2_values_8bits.txt" href="http://www.loc.gov/standards/iso639-2/ISO-639-2_values_8bits.txt" line="115-136" title="ISO 639-2 Code List"/>
			</slide>
			<slide>
				<title>IANA Language Subtag Registry</title>
				<listing src="language-subtag-registry.txt" href="http://www.iana.org/assignments/language-subtag-registry" line="4411-4431" title="IANA Language Subtag Registry"/>
			</slide>
        </part>
		<part id="i18n-uri">
			<title>URIs for Multilingual Resources</title>
			<slide>
				<title>Naming Language Variants</title>
				<ul>
					<li>URIs are intended to be names for resources (not for representations)</li>
					<ul>
						<li>when does a representation become an individual resource?</li>
						<li>the distinction is a gradual one (and it also applies to versioning over time)</li>
						<li>if I send you a bookmark of a german page, should you get an english variant?</li>
					</ul>
					<li>Advantages when language variants are different resources</li>
					<ul>
						<li>language variants can be identified (e.g., bookmarked) reliably</li>
					</ul>
					<li>Advantages when language variants are the same resource</li>
					<ul>
						<li>when accessing the resource, a more appropriate variant can be negotiated dynamically</li>
					</ul>
					<li>Ideally, language variants should use a mix of both approaches</li>
					<ul>
						<li>have their own URI so that they can be bookmarked and handled individually</li>
						<li>remain identifiable as a variant of the more generic <q>language-independent resource</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Variant Naming Variations</title>
				<ul>
					<li>URIs can be used in a number of different ways</li>
					<ul>
						<li>any Web-based publishing should have a strategy for using URIs</li>
						<li>being consistent is almost as important as doing it right</li>
					</ul>
					<li>For variations of URIs, various parts of the Web architecture can be used</li>
					<ul>
						<li>DNS names for naming servers</li>
						<li>various parts of the URI name</li>
						<li>protocol mechanisms of HTTP or <link href="cookies"/></li>
						<li><a href="http://h3h.net/2007/01/designing-urls-for-multilingual-web-sites/">Designing URLs for Multilingual Web Sites</a> lists a number of possible variations</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-dns-domains">
				<title>DNS Domains</title>
				<pre>http://<span style="color : red">en.</span>example.com/some/page</pre>
				<ul>
					<li>Defines DNS subdomains for all supported languages</li>
					<li>Advantages</li>
					<ul>
						<li>offers easy load-balancing to different servers (but code must kept in sync)</li>
						<li>bookmarks identify the language variant</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>no easy way to get from one variant to another in terms of <q>URI navigation</q></li>
						<li>language management requires DNS updates</li>
						<li>DNS names have not been designed for this kind of usage</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-path-segment">
				<title>Constructed Paths</title>
				<pre>http://example.com/<span style="color : red">en/</span>some/page</pre>
				<ul>
					<li>Encodes the language as the first path segment of the URI path</li>
					<li>Advantages</li>
					<ul>
						<li>bookmarks identify the language variant</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>logically, the URI path does not represent the hierarchy of resources</li>
						<li>no easy way to get from one variant to another in terms of <q>URI navigation</q></li>
						<li>hard to maintain if this is used as the actual layout of documents in directories</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-query-string">
				<title>Query Strings</title>
				<pre>http://example.com/some/page<span style="color : red">?lang=en</span></pre>
				<ul>
					<li>Use a query string to get the language in the required language variant</li>
					<li>Advantages</li>
					<ul>
						<li>bookmarks identify the language variant</li>
						<li><code>http://example.com/some/page</code> is usable for the abstract resource</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>does not allow proper caching of pages</li>
						<li>search engines ignore query strings and thus cannot link to the variants</li>
						<li>against the idea of query strings which are meant for dynamic content</li>
						<li>hard to combine with other query string information (if required)</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-dns-tld">
				<title>DNS TLDs</title>
				<pre>http://example<span style="color : red">.us</span>/some/page</pre>
				<ul>
					<li>Use DNS TLDs to identify the supported languages</li>
					<li>Advantages</li>
					<ul>
						<li>bookmarks identify the language variant</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>requires registration of many TLDs (effort, costs, domain squatting)</li>
						<li>countries do not identify languages (requires additional mechanism)</li>
						<li>because of the country/language complexity, cross-language links are hard to maintain</li>
						<li>DNS names have not been designed for this kind of usage</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-cookie">
				<title>Cookies</title>
				<pre>http://example.com/some/page</pre>
				<ul>
					<li>Store the cookie with the language preference and use the cookie setting</li>
					<li>Advantages</li>
					<ul>
						<li>language is not part of the URI and thus the URI identifies the resource</li>
						<li>bookmarks (and any intra-page links) will automatically yield the preferred language variant</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>does not work if <link href="cookies"/> are disabled or intercepted</li>
						<li>it is not possible to create a bookmark for the language variant</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-http">
				<title>Content Negotiation</title>
				<pre>http://example.com/some/page</pre>
				<ul>
					<li>Use browser settings and <link href="http-conneg"/> to get the variant</li>
					<li>Advantages</li>
					<ul>
						<li>language is not part of the URI and thus the URI identifies the resource</li>
						<li>bookmarks (and any intra-page links) will automatically yield the preferred language variant</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>does not work if the user agent does not support <link href="http-conneg"/></li>
						<li>does not work if the user has not configured the browser correctly</li>
						<li>switching between languages requires configuration of the browser</li>
						<li>it is not possible to create a bookmark for the language variant</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-extension">
				<title>Path Segment Name</title>
				<pre>http://example.com/some/page<span style="color : red">.en</span></pre>
				<ul>
					<li>Use <q><code>.</code></q> which uses the resource's <q>extension</q></li>
					<li>Advantages</li>
					<ul>
						<li>bookmarks identify the language variant</li>
						<li>maps easily to extensions in file systems</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>does not mix well if additional properties (such as the format) are required</li>
						<li>no easy way to get from one variant to another in terms of <q>URI navigation</q></li>
						<li>not officially recognized as URI structure (just simplifies parsing and recognition)</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-comma">
				<title>URI Sub-Delimiter Comma</title>
				<pre>http://example.com/some/page<span style="color : red">,en</span></pre>
				<ul>
					<li>Use <q><code>,</code></q> for specifying a parameter to a URI <em>path segment</em></li>
					<li>Advantages</li>
					<ul>
						<li>bookmarks identify the language variant</li>
						<li>makes the language identifiable as a special part of the URI</li>
						<li><code>http://example.com/some/page</code> is usable for the abstract resource</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>does not mix well with parameters for other resource dimensions</li>
						<li>not officially recognized as URI structure (just simplifies parsing and recognition)</li>
					</ul>
				</ul>
			</slide>
			<slide id="lang-variant-semicolon">
				<title>URI Sub-Delimiter Semicolon</title>
				<pre>http://example.com/some/page<span style="color : red">;lang=en</span></pre>
				<ul>
					<li>Use <q><code>;</code></q> for specifying a parameter to a URI <em>path segment</em></li>
					<li>Advantages</li>
					<ul>
						<li>bookmarks identify the language variant</li>
						<li>makes the language identifiable as a special part of the URI</li>
						<li>can be combined with parameters for other resource dimensions</li>
						<li><code>http://example.com/some/page</code> is usable for the abstract resource</li>
					</ul>
					<li>Disadvantages</li>
					<ul>
						<li>not officially recognized as URI structure (just simplifies parsing and recognition)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Now What?</title>
				<ul>
					<li>There is no perfect solution for naming language variants</li>
					<ul>
						<li>the Web architecture does not provide support for <q>variant preference</q> in a URI</li>
						<li>there is a strong separation between static URIs and dynamic content negotiation</li>
					</ul>
					<li>The name-based solutions (using <q><code>.</code></q>, <q><code>;</code></q>, and <q><code>,</code></q>) are very similar</li>
					<ul>
						<li>any difference between them is how they are used in practice</li>
						<li>from the URI point of view, path segments containing <q><code>.</code></q>, <q><code>;</code></q>, and <q><code>,</code></q> are not treated in any special way</li>
						<li>the only officially recognized special path segments are <q><code>.</code></q> and <q><code>..</code></q></li>
					</ul>
					<li>Theoretically, HTTP could define scheme-specific semantics</li>
					<ul>
						<li>for <code>http</code> URIs, <q><code>,</code></q> and/or <q><code>;</code></q> could interact with content negotiation</li>
						<li>while this is an interesting idea with many useful applications, it is very unlikely to happen</li>
					</ul>
					<li>Use <link href="lang-variant-comma"><q><code>,</code></q></link> or <link href="lang-variant-semicolon"><q><code>;</code></q></link> for language variants, but do not expect magic to happen</li>
				</ul>
			</slide>
		</part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Babelification</title>
				<ul>
					<li>I18N is a non-trivial task and requires some planning</li>
					<li>L10N involves much more than just translating text</li>
					<li>Producing fully localized products is a complex issue</li>
					<li>The <em>locale</em> is based on language and culture and customs</li>
					<li>Successful L10N requires the input of local people</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="trends">
        <title short="Trends">Web Trends</title>
        <date>2009-12-03</date>
        <toc class="abstract">Web architecture in many cases simply lays the groundwork for developing application areas. In this final lecture we briefly look at some of the current trends on the Web, and how they connect to Web architecture. While the drivers of the trends often are not exclusively technical, they often have a substantial background in technology as an enabler of applications. Not all of the technological issues are within the realm of Web architecture, but increasingly the Web ties together a lot of formerly disconnected application areas, and serves as an integration and unification platform.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Web Versioning</title>
			<img style="height : 65% ; margin : 4% ; " src="web20-web30.jpg" title="What's the difference between Web 2.0 and Web 3.0?" href="http://blogs.ubc.ca/dean/files/2009/02/web203011.jpg"/>
		</slide>
		<slide>
			<title>Past Developments</title>
			<ul>
				<li>Web 2.0</li>
				<li>Blogging and micro-blogging</li>
				<li>Social networks</li>
				<li>Web services</li>
			</ul>
		</slide>
		<part id="location">
			<title>The Location-Aware Web</title>
			<slide>
				<title>Location-Based Services</title>
				<img style="height : 65% ; margin : 4% ; " src="nokia-map.jpg"/>
			</slide>
			<slide>
				<title>Location and the Web</title>
				<ul>
					<li>The current Web is unaware of location as a concept</li>
					<ul>
						<li>it supports other concepts such as media types and languages</li>
						<li>many applications use location and expose location in the UI</li>
						<li><link href="mobile">mobile access</link> and location-oriented services become more widespread</li>
					</ul>
					<li>How to express location as a Web-level concept?</li>
					<ul>
						<li>should there be a <em>URI scheme for locations</em>?</li>
						<li>should there be <em>location support in HTTP</em>?</li>
						<li>should there be <em>location metadata in HTML</em>?</li>
						<li>should there be <em>location access in Web Apps</em>? (<a href="http://www.w3.org/TR/geolocation-API/">there is</a>)</li>
						<li>should there be <em>location concepts in RDF</em>?</li>
					</ul>
					<li>Thinking beyond <q>location-oriented applications</q></li>
					<ul>
						<li>applications typical are closed verticals</li>
						<li>mashability and repurposing requires exposed application models</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="mobile">
			<title>The Mobile Web</title>
			<slide>
				<title>Mobile Phones</title>
				<img style="height : 65% ; margin : 4% ; " src="phones.jpg"/>
			</slide>
			<slide>
				<title>Native vs. Web</title>
				<ul>
					<li>iPhone switched from Web-based to native</li>
					<ul>
						<li>iPhone OS 2.0 introduced native apps and the <em href="http://www.apple.com/iphone/appstore/">App Store</em></li>
						<li>the native app model have proven to be the killer feature of the iPhone</li>
						<li>other mobile platforms try to replicate the model</li>
					</ul>
					<li><q>In-App Purchases</q> allow apps to use micropayments</li>
					<ul>
						<li>it could be <a href="http://dret.typepad.com/dretblog/2009/03/the-rise-of-the-micropayments.html">The Rise of The Micropayments</a></li>
						<li>so far it has not seen a lot of widespread use</li>
					</ul>
					<li>Phones will have increasingly powerful browsers</li>
					<ul>
						<li>phones might also have <em>only browsers</em> (ChromeOS)</li>
						<li><link href="apps"/> might be a better long-term strategy</li>
					</ul>
					<li>Currently too many players on the mobile OS market</li>
					<ul>
						<li>consolidation is inevitable</li>
						<li>the Web is moving to <link href="apps"/>, why not the mobile Web as well?</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="apps">
			<title>Web Apps</title>
			<slide>
				<title>HTML5 for Advanced Web Apps</title>
				<img style="height : 65% ; margin : 4% ; " src="html5.gif" title="HTML5"/>
			</slide>
			<slide>
				<title>The CSS3 Effect?</title>
				<ul>
					<li>HTML5 itself defines the language and the DOM</li>
					<li>Web Apps need additional functionality in terms of APIs</li>
					<ul>
						<li>push services: <a href="http://www.w3.org/TR/2009/WD-eventsource-20091029">Server-Sent Events</a></li>
						<li>TCP over HTTP over TCP: <a href="http://www.w3.org/TR/2009/WD-websockets-20091029">Web Sockets</a></li>
						<li>key/value database: <a href="http://www.w3.org/TR/2009/WD-webstorage-20091029">Web Storage</a> or <a href="http://www.w3.org/TR/2009/WD-WebSimpleDB-20090929">WebSimpleDB</a></li>
						<li>SQL database: <a href="http://www.w3.org/TR/2009/WD-webdatabase-20091029">Web Database</a></li>
						<li>threads: <a href="http://www.w3.org/TR/2009/WD-workers-20091029">Web Workers</a></li>
						<li>file system access: <a href="http://www.w3.org/TR/2009/WD-FileAPI-20091117">File API</a></li>
					</ul>
					<li>Turning the browser into an OS</li>
					<ul>
						<li>the Java vision without the Java language</li>
						<li>where do you stop? (3D rendering, device access, introspection)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="semantics">
			<title>Web Semantics</title>
			<slide>
				<title>Understanding the Web</title>
				<img style="height : 65% ; margin : 4% ; " src="semantic-web.jpg"/>
			</slide>
			<slide>
				<title>Understanding Resources</title>
				<ul>
					<li>Finding needles in a haystack</li>
					<ul>
						<li>search engines are amazing but limited in their understanding</li>
						<li>more understanding of resources allows better resource access</li>
					</ul>
					<li>Semantics are always embedded into perspectives/models</li>
					<ul>
						<li>who has the right to understand/describe resources?</li>
						<li>how do I know how the resource was interpreted and described?</li>
						<li>how can I find alternative descriptions?</li>
						<li>where is the line between <q>data</q> and <q>information</q>/<q>knowledge</q>?</li>
					</ul>
					<li>The tough challenges are not on the technical level</li>
					<ul>
						<li><q>kid-friendly search engines</q></li>
						<li>browser-level or OS-level content filtering</li>
						<li>Internet-level or Web-level filtering</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="things">
			<title>Web of Things</title>
			<slide>
				<title>Connecting Real-World Objects</title>
				<img style="height : 65% ; margin : 4% ; " src="internet-of-things.jpg"/>
			</slide>
			<slide>
				<title>Internet vs. Web</title>
				<ul>
					<li><q>Internet of Things</q> is the current buzzphrase</li>
					<ul>
						<li>a useful and necessary first step (establish connectivity)</li>
						<li>not enough to enable ubiquitous accessibility and usability</li>
					</ul>
					<li>Apply Web architecture to the <em>Internet of Things</em></li>
					<ul>
						<li>expose <q>Things</q> via <link href="rest">REST</link>ful Web services</li>
						<li>from the Web's perspective, things are just resources</li>
					</ul>
					<li>Some new twists and challenges have to be solved</li>
					<ul>
						<li>how to find <q>things</q> (you cannot search for them with Google)</li>
						<li>how to find the interesting things (there are too many things)</li>
						<li>how to interact with them (is <link href="http">HTTP</link> sufficient as the uniform interface?)</li>
						<li>how to support <link href="realtime">real-time</link> interactions (push instead of pull)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="realtime">
			<title>The Real-Time Web</title>
			<slide>
				<title>Tracking in Real-Time</title>
				<img style="height : 65% ; margin : 4% ; " src="woopra.png" href="http://www.woopra.com/"/>
			</slide>
			<slide>
				<title>Pull vs. Push</title>
				<ul>
					<li>Pulling is the underlying principle of the current Web</li>
					<ul>
						<li>interactions are initiated by the client</li>
						<li>servers don't have to keep track of their clients</li>
					</ul>
					<li>Translating between the <em>Pull</em> and <em>Push</em> patterns</li>
					<ul>
						<li>translating push into pull is trivial: store the push, wait for the pull</li>
						<li>translating pull into push requires some optimization</li>
					</ul>
					<li>Pushing is faster but more expensive</li>
					<ul>
						<li>scalable pushing infrastructures can be built (iPhone push notifications)</li>
						<li>building it in a distributed way makes it more flexible and scalable</li>
					</ul>
					<li><a href="http://code.google.com/p/pubsubhubbub/">PubSubHubbub</a> builds a push model around feeds</li>
					<ul>
						<li>feeds declare that the content is also available through a hub</li>
						<li>hubs can be configured into a multi-level hierarchy</li>
						<li>clients connect to hubs and are notified by their hub</li>
					</ul>
				</ul>
			</slide>
		</part>
		<slide>
			<title>Conclusions</title>
			<ul>
				<li>Web Architecture as Foundation</li>
				<ul>
					<li>The Web has become an essential infrastructure</li>
					<li>Understanding the Web helps understanding services</li>
					<li>Web architecture and the Web will keep evolving</li>
					<li>Certain principles are surprisingly stable over time</li>
					<li>Don't spend too much time analyzing buzzphrases</li>
				</ul>
			</ul>
		</slide>
	</presentation>
 </hotspot>
