<?xml version="1.0" encoding="UTF-8"?>
<!-- $Id: xml-fall07.xml 755 2007-12-07 03:47:05Z dret $ -->
<?xslidy counter-separator=":&#160;" ?>
<?xslidy counter-format="full" ?>
<?xslidy extension-file="html" ?>
<?xslidy extension-link="" ?>
<?xslidy img-path="img" ?>
<?xslidy link-author="http://dret.net/netdret/" ?>
<?xslidy link-contents="./" ?>
<?xslidy link-glossary="http://dret.net/glossary/" ?>
<?xslidy link-home="./" ?>
<?xslidy listing-class="listing" ?>
<?xslidy listing-path="src" ?>
<?xslidy outline-class="outline" ?>
<?xslidy outline-title="Outline" ?>
<?xslidy outlink-mark="a" ?>
<?xslidy outlink-style="class(outlink)" ?>
<?xslidy part-slide-count="all" ?>
<?xslidy part-slide-text=" [*]" ?>
<?xslidy layout="ischool" ?>
<?xslidy xslidy-prefix="xslidy" ?>
<xslidy xmlns="http://dret.net/xmlns/xslidy/1" xmlns:xslidy="http://dret.net/xmlns/xslidy/1">
	<title short="XML Foundations"><a href="./" title="Course Homepage">XML Foundations</a> (INFO 242)</title>
	<author short="E. Wilde"><a href="http://dret.net/netdret/" title="dret.net">Erik Wilde</a></author>
	<affiliation short="UC Berkeley ISchool"><a href="http://www.berkeley.edu/" title="University of California, Berkeley">UC Berkeley</a> <a href="http://ischool.berkeley.edu/" title="ISchool">School of Information</a></affiliation>
	<date short="Fall 2007">Fall Semester 2007</date>
	<copyright>2007 Erik Wilde</copyright>
	<style type="text/css" src="xslidy-fall07.css"/>
	<index name="index.html">
		<category element="xml" class="xml"/>
		<category element="elem" class="xml elem"/>
		<category element="xpathf" class="xpath"/>
		<category element="xpath" class="xpath"/>
		<category element="xslte" class="xslt elem"/>
		<category element="xslta" class="xslt"/>
		<category element="xslt" class="xslt"/>
		<category element="xq" class="xq"/>
		<category element="xsde" class="xsd elem"/>
		<category element="xsda" class="xsd"/>
		<category element="xsd" class="xsd"/>
		<category element="xsdtype" class="xsd xsdprefix"/>
		<category element="http" class="http"/>
	</index>
	<toc name="toc.html">
		<table rules="all" cellspacing="0" cellpadding="5" width="100%">
			<thead>
				<tr>
					<th>Date</th>
					<th>Subject</th>
					<th>Slides</th>
					<th>Resources</th>
				</tr>
			</thead>
			<tbody>
				<xslidy:for-each-presentation>
					<tr>
						<td align="right" valign="top"><xslidy:date/></td>
						<td valign="top"><b><xslidy:title/><span class="toggle">:</span></b> <span class="toggle"><span class="abstract"><xslidy:toc class="abstract"/></span></span></td>
						<td align="center"><xslidy:presentation-link title="Lecture Slides"><xslidy:title form="short"/></xslidy:presentation-link> <xslidy:slides>(*&#160;Slides)</xslidy:slides></td>
						<td align="center"><xslidy:toc class="resources"/></td>
					</tr>
				</xslidy:for-each-presentation>
			</tbody>
		</table>
	</toc>
	<toc name="242.xml">
		<course xmlns="urn:publicid:IDN+www.sims.berkeley.edu:schema:syllabusapp:syllabus:200404:en" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:publicid:IDN+www.sims.berkeley.edu:schema:syllabusapp:syllabus:200404:en syllabus_schema.xsd">
			<generalInformation>
				<title>XML Foundations</title>
				<units>2</units>
				<website>http://dret.net/lectures/xml-fall07/</website>
				<departmentListing>
					<name>SIMS</name>
					<code>INFO</code>
					<courseNumber>242</courseNumber>
				</departmentListing>
				<schedule>
					<year>2007</year>
					<semester>F</semester>
					<startDate>2007-08-28</startDate>
					<endDate>2007-12-06</endDate>
				</schedule>
				<teachingTeam>
					<teacher>
						<typeCode>Professor</typeCode>
						<initials>EW</initials>
						<name>
							<givenName>Erik</givenName>
							<familyName>Wilde</familyName>
						</name>
						<contact>
							<email>dret@berkeley.edu</email>
							<phone>
								<type>Office</type>
								<number>+1-510-6432253</number>
							</phone>
							<website>http://dret.net/netdret/</website>
						</contact>
					</teacher>
				</teachingTeam>
				<gradingOptionCode>LG</gradingOptionCode>
				<description>
					<p>Three hours of lecture, one hour of Laboratory per week. The Extensible Markup Language (XML), with its ability to define formal structural and semantic definitions for metadata and information models, is the key enabling technology for information services and document-centric business models that use the Internet and its family of protocols. This course introduces XML syntax, styles and transformations, and schema languages. It balances conceptual topics with practical skills for designing and implementing conceptual models as XML schemas.</p>
				</description>
			</generalInformation>
			<syllabus>
				<instructionFormatCode>LEC</instructionFormatCode>
				<dayPattern>
					<dayTime>
						<dayOfWeek>Tu</dayOfWeek>
						<timeSpan>
							<startTime>14:00:00</startTime>
							<endTime>15:30:00</endTime>
						</timeSpan>
					</dayTime>
					<dayTime>
						<dayOfWeek>Th</dayOfWeek>
						<timeSpan>
							<startTime>14:00:00</startTime>
							<endTime>15:30:00</endTime>
						</timeSpan>
					</dayTime>
				</dayPattern>
				<location>110 South Hall</location>
				<classes>
					<xslidy:for-each-presentation>
						<class>
							<title><xslidy:title/></title>
							<date><xslidy:date form="short"/></date>
							<xslidy:if-toc class="abstract"><description><xslidy:toc class="abstract"/></description></xslidy:if-toc>
							<resourceList>
								<resource>
									<title>Lecture Notes</title>
									<url><xslidy:presentation-link element="" prefix="http://dret.net/lectures/xml-fall07/"/></url>
								</resource>
								<xslidy:if-toc class="resources"><resource><comment><xslidy:toc class="resources"/></comment></resource></xslidy:if-toc>
							</resourceList>
						</class>
					</xslidy:for-each-presentation>
				</classes>
			</syllabus>
			<updated>
				<updateDate>Fall 2007</updateDate>
				<updateBy>dret</updateBy>
			</updated>
		</course>
	</toc>
	<presentation id="intro">
		<title short="Introduction">Overview and Introduction</title>
		<date>2007-08-28</date>
		<toc class="resources"><a href="http://www.w3.org/Press/1998/XML10-REC">XML 1.0 Press Release</a></toc>
		<toc class="abstract">The <em>Extensible Markup Language (XML)</em> has been introduced in 1998 to enable content providers to publish their content on the Web in an application-specific format. HTML was considered as conveying not enough semantics, since its only purpose was (and is) the preparation of content for Web-based publishing. XML was the first step towards machine-readable data formats for the Web, a trend that since its invention has been taken to higher levels with the idea of the <em>Semantic Web</em>. XML appeared when the Web was in the steepest part of its success curve, and since then has taken over as the globally accepted format for the exchange of machine-readable structured data.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XML Executive Summary</title>
			<ul>
				<li>More and more value switches from goods to information</li>
				<li>Information sharing needs well-defined structures</li>
				<li>Business agility and flexibility are critical success factors</li>
				<li>Standardized formats prevent lock-in and incompatibilities</li>
				<li>XML is the most successful format for structured data</li>
				<li>XML technologies are widely used and universally available</li>
				<li>XML for B2B enables better workflow engineering</li>
				<li>XML for B2C is a good interface between B2B and Web interfaces</li>
				<li><em>XML is a mission-critical success factor for optimizing ROI and minimizing interoperability risks in today's fast-moving globalized fragmented business landscape …</em></li>
			</ul>
		</slide>
		<slide>
			<title>What's the Plan?</title>
			<ul>
				<li><link href="basics">XML Basics</link> and <link href="bestpractices">how to apply them</link></li>
				<li><link href="dtd">Describing classes of XML documents</link></li>
				<li><link href="xmlns">Combining different vocabularies of XML documents</link></li>
				<li><link href="xpath">Selecting parts of an XML document</link></li>
				<li><link href="xslt-1">Transforming XML into something else (or XML again)</link></li>
				<li><link href="xsdl-1">A more complicated way to describe classes of XML documents</link></li>
				<li><link href="schemalanguages">Even more ways of describing classes of XML documents</link></li>
				<li><link href="xquery-1">How does all of this relate to databases?</link></li>
				<li><link href="trends">What to expect as future developments</link></li>
			</ul>
		</slide>
		<slide>
			<title>What are we doing?</title>
			<img src="altova-partner.gif" style="float : right ; margin : 1em ; " href="http://www.altova.com/" title="Altova XML Spy"/>
			<ul>
				<li>Assignments</li>
				<ul>
					<li>blogs as the common theme (the perfect XML application example)</li>
					<li>how to create an XML document representing a blog</li>
					<li>how to write a schema describing this document's structure</li>
					<li>how to select parts of the blog (posts, titles, comments, …)</li>
					<li>how to transform blogs (into HTML, RSS, Atom, …)</li>
					<li>how to extract blog information from an XML database</li>
				</ul>
				<li>Tools</li>
				<ul>
					<li>XML editor such as <a href="http://www.altova.com/">Altova XML Spy</a> (XSLT and XQuery included)</li>
					<li>XSLT Processor such as <a href="http://www.saxonica.com/">Saxon</a></li>
					<li>XQuery Processor such as <a href="http://www.saxonica.com/">Saxon</a></li>
					<li>XML database such as <a href="http://www.marklogic.com/">MarkLogic</a> or <a href="http://exist.sourceforge.net/">eXist</a></li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Varia</title>
			<slide>
				<title>About Me</title>
				<ul>
					<li>Computer Science at <a href="http://www.tu-berlin.de/eng/">Technical University of Berlin (TUB)</a> (88-91)</li>
					<li>Ph.D. at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> (92-97)</li>
					<li>Post-Doc at <a href="http://www.icsi.berkeley.edu/" title="International Computer Science Institute">ICSI</a>, Berkeley (97/98)</li>
					<li>Various activities back in Switzerland (98-06)</li>
					<ul>
						<li>teaching at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> and <a href="http://www.fhnw.ch/">FHNW</a></li>
						<li>working as independent consultant (training, courses, consulting)</li>
						<li>research in <a href="http://dret.net/projects/">various XML-related areas</a></li>
					</ul>
					<li>Visiting Assistant Professor at the <a href="http://ischool.berkeley.edu/">School of Information</a> (since fall 2006)</li>
					<ul>
						<li>technical director of the <a href="http://isd.ischool.berkeley.edu/">Information and Service Design (ISD) program</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>About this Course</title>
				<ul>
					<li>Course Web page: <code><a href="./">http://dret.net/lectures/xml-fall07/</a></code></li>
					<li>Course mailing list: subscribe at <code><a href="mailto:majordomo@ischool.berkeley.edu">majordomo@ischool.berkeley.edu</a></code></li>
					<ul>
						<li>no subject (leave blank)</li>
						<li>body of message: <code>subscribe i242</code></li>
					</ul>
					<li>Letter grade based on final exam (30' oral)</li>
				</ul>
			</slide>
			<slide>
				<title>About these Slides</title>
					<ul>
						<li>Generated from <a href="http://dret.net/projects/xslidy/">XSLidy</a> <a href="./xml-fall07.xml">XML</a></li>
						<ul>
							<li>all <a href="http://www.w3.org/Talks/Tools/Slidy/">Slidy</a> presentations are generated from this source</li>
							<li><code><a href="./242.xml">242.xml</a></code> for importing the syllabus into <a href="http://rosetta.sims.berkeley.edu:8085/sylvia/f07/view/242.complete">SylViA</a></li>
							<li><code><a href="./toc.html">toc.html</a></code> for displaying the summary on the <a href="./">course's Web page</a></li>
						</ul>
						<li>Designed for online presentation and use (lots of links!)</li>
						<ul>
							<li>for printing, use <q>a</q> (all slides), and then <q>s</q> (smaller font) a couple of times</li>
						</ul>
						<li>A good real-world example for XML applications</li>
						<ul>
							<li>XSLidy is useful, but there is no interface (XML editing only)</li>
							<li>SylViA is useful, but there is no interface (XML editing or XSLidy export)</li>
						</ul>
					</ul>
			</slide>
			<slide>
				<title>Additional Resources</title>
				<ul>
					<li>My <a href="http://dret.net/glossary/">Online Glossary at <code>http://dret.net/glossary/</code></a></li>
						<ul>
							<li>suggestions, updates, corrections are very welcome (really!)</li>
							<li>XML-based and XSLT-generated HTML pages</li>
						</ul>
					<li>My <a href="http://dret.net/biblio/">bibliography at <code>http://dret.net/biblio/</code></a></li>
						<ul>
							<li>suggestions, updates, corrections are very welcome (really!)</li>
						</ul>
					<li>The <a href="http://www.w3.org/"><em>World Wide Web Consortium (W3C)</em></a></li>
					<ul>
						<li>the organization which invented XML</li>
						<li>as well as (almost) all other technologies covered in this course</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>What is XML?</title>
			<slide>
				<title>XML Yin &amp; Yang</title>
				<img src="yin-yang.png" style="float : right ; margin : 1em ; "/>
				<ul>
					<li>XML is …</li>
					<ul>
						<li>… great for exchanging trees (if this is what you want to do)</li>
						<li>… platform-independent (even your mobile phone processes XML)</li>
						<li>… a foundation for other technologies (some of which we will look at)</li>
					</ul>
				</ul>
				<ul>
					<li>XML is not …</li>
					<ul>
						<li>… a programming language (ever programmed comma-separated values?)</li>
						<li>… capturing semantics (without higher-layer consensus, XML is worthless)</li>
						<li>… ensuring interoperability (we both use bits! we can interoperate!)</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>What is XML Good for?</title>
				<slide>
					<title>Why Use XML?</title>
					<ul>
						<li>Because you want to share data</li>
						<ul>
							<li>share it in a format which is widely used and easy to use</li>
							<li>enable others to use it on various platforms with existing tools</li>
						</ul>
						<li>Because you want to share data cheaply</li>
						<ul>
							<li>it is easier to use XML than to invent something new</li>
							<li>it is even easier to use an existing XML schema than to invent a new one</li>
						</ul>
						<li>Because you want to share data openly</li>
						<ul>
							<li>if you invent new formats, people must process them</li>
							<li>avoid applying the <q>security through obscurity</q> principle inadvertently</li>
							<li>application-specific processing should be deferred to higher layers</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Is XML Self-Describing?</title>
					<ul>
						<li>XML is often said to be <q>self-describing</q></li>
						<ul>
							<li>many people think this is the same as <q>self-explanatory</q></li>
							<li>the catch is what exactly it is you refer to by <q>describing</q></li>
						</ul>
						<li>Database data cannot live without a database</li>
						<ul>
							<li>database data is simply content, the structure is provided by a DBMS</li>
							<li>XML documents have their structure encoded within them</li>
							<li>compared to database data, XML in fact is <q>self-describing</q></li>
						</ul>
						<li>What is the gap between <q>self-describing</q> and <q>self-explanatory</q>?</li>
						<ul>
							<li>it is impossible to find out how the document could be modified</li>
							<li>there are no semantics associated with neither structure nor content</li>
							<li>so <q>self-describing</q> means, you can guess a lot, but you maybe wrong</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>What is XML not Good for?</title>
				<slide>
					<title>XML is Character-Based</title>
					<ul>
						<li>XML is <u>not</u> a binary format, it is <link href="unicode">based on Unicode</link></li>
						<ul>
							<li><q>binary structures</q> cannot (or rather should not) be described using XML</li>
						</ul>
						<li>Multimedia formats often are binary</li>
						<ul>
							<li>image formats such as GIF, JPEG, and PNG</li>
							<li>audio formats such as MP3 and AAC</li>
							<li>video formats such as MPEG4 and H.264</li>
						</ul>
						<li>But: multimedia also uses many XML formats</li>
						<ul>
							<li>vector graphics formats such as <em>Scalable Vector Graphics (SVG)</em></li>
							<li><em>Synchronized Multimedia Integration Language (SMIL)</em> for describing presentations</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML is a Syntax for Trees</title>
					<ul>
						<li>Not all data is easily represented by trees</li>
						<ul>
							<li>overlapping markup (multiple <q>views</q> of the same content)</li>
							<li>graph-like structures which are less constrained than trees</li>
						</ul>
						<li>What is it that you have in your tree?</li>
						<ul>
							<li>XML encodes a structure purely on the syntactic level</li>
							<li>what the structures <u>mean</u> is in no way described by XML</li>
							<li>XML structures must be accompanied by semantic descriptions</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Usages</title>
					<ul>
						<li>XML can be used <link href="bestpractices">in different ways</link></li>
						<ul>
							<li>people should be able to use your XML directly using standard tools</li>
							<li>if they <em>absolutely need</em> a set of special tools, something is wrong</li>
						</ul>
						<li>XML is hip, so everybody wants to use it</li>
						<ul>
							<li>many things have been created ad-hoc and without much planning</li>
							<li>if you start something which is XML-based, use XML responsibly</li>
							<li>if you have to use some <q>bad XML</q>, complain about it</li>
						</ul>
						<li>Finding the balance can be hard</li>
						<ul>
							<li>XML is great for prototyping and experiments</li>
							<li>once you decide to redesign your XML, it may be too late</li>
							<li><em>XML documents</em> may be short-lived, <em>XML schemas</em> are definitely not</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Why XML?</title>
			<slide>
				<title>Web Technologies</title>
				<ul>
					<li>Early Web: URI+HTTP+HTML</li>
					<ul>
						<li>URIs identify resources (in a human-readable way)</li>
						<li>HTTP retrieves resources (using a simple protocol)</li>
						<li>HTML is the resource format (using a simple data format)</li>
					</ul>
					<li>The early Web was a distributed hypermedia system</li>
					<ul>
						<li>not designed by hypermedia researchers or companies</li>
						<li>simple enough to be adopted very fast</li>
					</ul>
					<li>The Web today uses many different technologies</li>
					<ul>
						<li>URI+HTTP+HTML for basic Web publishing</li>
						<li>CSS &amp; JavaScript (maybe even Ajax) for advanced publishing</li>
					</ul>
					<li>JavaScript &amp; XML (a.k.a. Ajax)</li>
					<ul>
						<li>scripts dynamically loading data from a server</li>
						<li>machine-to-machine interaction: the server and the script</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>From Humans to Machines</title>
				<ul>
					<li>The Web was designed for humans</li>
					<ul>
						<li>HTML is a language for describing page layout and links</li>
						<li>machines were only used for implementing it</li>
					</ul>
					<li>Search engines were the first machine users on the Web</li>
					<ul>
						<li>they made the Web's success possible</li>
						<li>they demonstrated how hard it is to <q>understand</q> HTML pages</li>
						<li>search engines are still a very active field of research</li>
					</ul>
					<li>A bigger Web needs more automation</li>
				</ul>
			</slide>
			<part>
				<title>Pre-XML Problems</title>
				<slide>
					<title>HTML is for Humans</title>
					<ul>
						<li>HTML is a format for <q>dead ends</q></li>
						<ul>
							<li>HTML is good for rendering Web pages</li>
							<li>HTML is bad for understanding Web pages</li>
							<li>the browser is a <q>dead end</q> (from a machine's point of view)</li>
						</ul>
						<li>Web growth in the late 90's was enormous</li>
						<ul>
							<li>everybody was putting information <q>online</q></li>
							<li>but this information was inaccessible for machines</li>
						</ul>
						<li>How can this information be made accessible to machines?</li>
						<ul>
							<li>HTML is not the right format (slightly better than fax machines)</li>
							<li>there was no other widely accepted format for structured data</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>A Machine-Friendly Web</title>
					<ul>
						<li>Information should be published in a machine-understandable format</li>
						<ul>
							<li>HTML is good for rendering Web pages</li>
							<li>HTML is bad for understanding Web pages</li>
							<li><q>understanding</q> is the key term here: <u>application</u> semantics!</li>
						</ul>
						<li>Information should be published in application-specific formats</li>
						<ul>
							<li>HTML is one application: Rendering documents for humans</li>
							<li>machines need other structures to process Web content</li>
						</ul>
						<li>1996: W3C Working Group <q>SGML on the Web</q></li>
						<ul>
							<li>HTML is just one document type defined with SGML</li>
							<li>SGML is a very complex and expensive technology</li>
							<li>how can SGML be made easily and widely usable?</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XML on the Web</title>
				<slide>
					<title>SGML, HTML, and XML</title>
					<ul>
						<li>Standard Generalized Markup Language (SGML)</li>
						<ul>
							<li>a language for designing <em>document types</em></li>
							<li>a very complex standard with many expensive and non-interoperable implementations</li>
						</ul>
						<li>Hypertext Markup Language (HTML)</li>
						<ul>
							<li>implements <a href="http://www.w3.org/TR/REC-html40/sgml/loosedtd.html">a simple SGML <em>document type</em></a></li>
							<li>its syntax is <a href="http://www.oasis-open.org/cover/sgmlsyn/sgmlsyn.htm">SGML syntax</a>, it is not defined by HTML itself</li>
							<li>uses very few SGML features, dedicated processors are rather easy to build</li>
						</ul>
						<li>Extensible Markup Language (XML)</li>
						<ul>
							<li>a language for designing <em>document types</em> (i.e., classes of documents)</li>
							<li>a greatly simplified version of SGML, omitting many obscure features</li>
							<li>a specification with <u>no optional parts!</u></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Documents on the Web</title>
					<ul>
						<li>XML's idea was that content should be published as XML</li>
						<ul>
							<li>stylesheets could then be used to render human-readable views</li>
							<li>machines could simply use the underlying XML</li>
						</ul>
						<li>There are (almost) no XML documents on the Web</li>
						<ul>
							<li>stylesheet support depends on browsers (software has a long life!)</li>
							<li>many content providers do not want to publish machine-readable data</li>
						</ul>
						<li>There are many XML documents behind HTML documents</li>
						<ul>
							<li>content does not have to be made public in a machine-readable way</li>
							<li>browser-independent HTML can be produced from XML</li>
							<li>XML technologies can be leveraged on the server-side</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Documents Elsewhere</title>
					<ul>
						<li>XML is not used as intended, but it is very successful</li>
						<ul>
							<li>as a server-side foundation for Web publishing</li>
							<li>as a B2B-focused format with no Web publishing in mind</li>
						</ul>
						<li>XML has been successful because of different reasons</li>
						<ul>
							<li>being there at the right time (Internet bubble)</li>
							<li>politically correct (the W3C is OS-agnostic)</li>
							<li>technically sound (simple and no optional parts)</li>
							<li>human-readable based on a well-known syntax</li>
							<li>great for rapid prototyping and experiments</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XML Today</title>
				<slide>
					<title>Used Everywhere</title>
					<ul>
						<li>Very small: Messages from sensors</li>
						<ul>
							<li>e.g., building automation or car electronics</li>
							<li>mostly implemented in hardware or firmware</li>
						</ul>
						<li>Very large: Genome sequences</li>
						<ul>
							<li>encoding the results of genome analyses</li>
							<li>yields very large XML documents (several gigabytes)</li>
						</ul>
						<li>Very different processing requirements</li>
						<ul>
							<li>very fast processing (time critical applications)</li>
							<li>memory-conserving processing (very large documents)</li>
							<li>incremental processing (streaming)</li>
							<li>random access (only small parts required)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>This Course and XML</title>
					<ul>
						<li><q>XML is the ASCII for the 21<sup>st</sup> century</q></li>
						<ul>
							<li>information professionals should know and use XML</li>
							<li>you will see it in many projects</li>
							<li>you will hopefully use it in many projects</li>
							<li>you will be able to build and test prototypes very rapidly</li>
						</ul>
						<li>What do you need for using XML?</li>
						<ul>
							<li>XML and some kind of schema language</li>
							<li>XSLT for processing it</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Beyond XML</title>
			<slide>
				 <title>Sharing Concepts</title>
				 <ul>
					<li>XML is a syntax for trees</li>
					 <ul>
						<li>trees are just structured data</li>
						<li>for doing something useful, you must <em>understand the trees</em></li>
					 </ul>
					<li>Schema-based sharing of concepts is possible</li>
					 <ul>
						<li>HTML works great because everybody is using it</li>
						<li>Anything beyond HTML's capabilities needs a new schema</li>
					 </ul>
					<li>General sharing of concepts is hard</li>
					 <ul>
						<li>the AI community tried for decades and failed</li>
						<li>micro-formats are a more humble approach to <q>reusable shared concepts</q></li>
						<li>agreement in communities gets exponentially harder with their size</li>
					 </ul>
				 </ul>
			</slide>
			<slide id="intro-semweb">
				<title>The Semantic Web</title>
				<ul>
					<li>Technologies for describing concepts</li>
					<ul>
						<li>the foundation of successful interaction is <em>mutual understanding</em></li>
						<li>describe your XML using Semantic Web technologies</li>
					</ul>
					<li>XML core technologies do not convey any meaning</li>
					<ul>
						<li>XML is a language for exchanging trees</li>
						<li>XML schema languages describe what trees may be exchanged</li>
						<li>XML schema languages are for <em>markup design</em></li>
					</ul>
					<li>Semantic Web technologies have received a lot of attention</li>
					<ul>
						<li>and a lot of research funding</li>
						<li>success for the most general approaches is questionable</li>
						<li>proven failure as demonstrated by <a href="http://technetcast.ddj.com/tnc_play_stream.html?stream_id=526">AI's failure</a></li>
						<li>modest approaches are much more promising and likely to succeed</li>
					</ul>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="blogxml">
		<title>Blogging in XML</title>
		<date>2007-08-30</date>
		<toc class="resources"></toc>
		<toc class="abstract">XML in used in a wide variety of application scenarios, resulting in a wide variety of requirements. This lecture introduces the application example used in this course, which is the representation of blog data in XML. Blogs are a good example for XML, because of their mix of structured data (blog post metadata) and textual data (the actual blog post), the requirement to derive different views (such as weekly and monthly summaries) from the same set of data, and the requirement to make the data available in various output formats (such as HTML and RSS).</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="xmlblogging-xml">
			<title>BlogXML</title>
			<slide>
				<title>Blog Structures</title>
				<ul>
					<li>Blogs have a number of recurring features</li>
					<ul>
						<li>they are a sequentially ordered series of blog posts</li>
						<li>a blog has an owner and a permanent URI</li>
						<li>posts have a date and content</li>
						<li>content can be anything from plain text to complex HTML structures</li>
					</ul>
					<li>Blog posts can be regarded as individual documents</li>
					<ul>
						<li>the complete blog (the collection of all posts) is one <em>big compound document</em></li>
						<li>for advanced publishing, the blog is more useful than isolated posts</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title><code>dretblog.xml</code></title>
				<listing src="dretblog.xml"/>
			</slide>
		</part>
		<part id="xmlblogging-dtd">
			<title>Rules for BlogXML</title>
			<slide>
				<title>Structural Constraints</title>
				<listing src="blogxml.dtd"/>
			</slide>
			<slide>
				<title>Adding Datatype Constraints</title>
				<listing src="blogxml.xsd"/>
			</slide>
			<slide>
				<title>A Clearer View</title>
				<img style="width : 90% ; margin : 4% ;" src="blogxml-xsd.png" title="BlogXML XSDL"/>
			</slide>
			<slide>
				<title>Less Constraints</title>
				<img style="width : 90% ; margin : 4% ;" src="blogxml-xsd-unbounded.png" title="BlogXML XSDL (Repeatable Images)"/>
			</slide>
		</part>
		<part id="xmlblogging-xpath">
			<title>Selecting BlogXML Content</title>
			<slide>
				<title>Using XML Structures</title>
				<ul>
					<li>How many blog posts?</li>
					<pre>count(//post)</pre>
					<li>The title of the second post?</li>
					<pre>//post[2]/title</pre>
					<li>How many days after the preceding post?</li>
					<pre>for $i in //post return days-from-duration(xs:date($i/@date) - xs:date($i/preceding-sibling::post[1]/@date))</pre>
					<li>How many days before the last post?</li>
					<pre>for $i in //post return days-from-duration(xs:date(//post[last()]/@date) - xs:date($i/@date))</pre>
				</ul>
			</slide>
		</part>
		<part id="xmlblogging-html">
			<title>Publishing BlogXML</title>
			<slide>
				<title>Generating HTML from BlogXML</title>
				<listing src="blog2html.xsl"/>
			</slide>
			<slide>
				<title>One Page Blog</title>
				<listing src="dretblog.html" line="1-10"/>
			</slide>
			<slide>
				<title>Generating a Blog from BlogXML</title>
				<listing src="blog2html2.xsl"/>
			</slide>
			<slide>
				<title>Hyperlinked Blog</title>
				<listing src="dretblog2.html"/>
				<listing src="2007-05-15.html"/>
			</slide>
		</part>
		<part id="xmlblogging-atom">
			<title>Syndicating BlogXML</title>
			<slide>
				<title>Generating Atom from BlogXML</title>
				<listing src="blog2atom.xsl"/>
			</slide>
			<slide>
				<title>dretblog Atom Feed</title>
				<listing src="dretblog.atom" line="2-26"/>
			</slide>
		</part>
		<part id="xmlblogging-xdbms">
			<title>Managing BlogXML</title>
			<slide>
				<title>Files vs. Databases</title>
				<ul>
					<li>XML typically is managed in documents</li>
					<ul>
						<li>XML has its roots in the document processing area</li>
					</ul>
					<li>What is the best granularity for XML documents?</li>
					<ul>
						<li>each post as a document that is individually managed</li>
						<li>each blog as a document that is individually managed</li>
						<li>all blogs as one big documents containing all data</li>
						<li>additional data such as users, groups, access rights, …</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xmlblogging-conclusions">
			<title>Conclusions</title>
			<slide>
				<title>XML Blogs!</title>
				<ul>
					<li>XML as the starting point for handling structured data</li>
					<li>Designing a data model (and the schema) is a key issue</li>
					<li>Working with XML is supported by various technologies</li>
					<li>Transformations produce new structures for data reuse</li>
					<li>Large amounts of XML data can be stored in XML databases</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="basics">
		<title short="Basics">XML Basics</title>
		<date>2007-09-04</date>
		<toc class="resources"><a href="http://www.w3.org/TR/REC-xml/" title="W3C XML 1.0 Specification">Spec</a></toc>
		<toc class="abstract">The <em>Extensible Markup Language (XML)</em> defines a simple way for structuring data. The power and popularity of XML can be explained by its versatility, the platform-independence, the standards and technologies leveraging it, and the number of tools and products supporting it. Understanding XML itself is rather simple, it only depends on a very small set of other technologies. Unicode and URIs are the most important foundations of XML. XML itself specifies two different things: on the one hand the format for structured data, which are called <em>XML documents</em>, and on the other hand a constraint language for XML documents, which is called <em>Document Type Definition (DTD)</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Foundations for XML</title>
			<slide>
				<title>Identifications</title>
				<ul>
					<li>Identification of Character Encodings</li>
					<ul>
						<li>text can be encoded using different character sets and encodings</li>
						<li>IANA maintains the <a href="http://www.iana.org/assignments/character-sets">official list of character encodings</a></li>
						<li>character encoding is about <em>characters</em>, not about <em>text</em></li>
					</ul>
					<li>Identification of Languages</li>
					<ul>
						<li>textual content should be tagged with language information</li>
						<li>specification based on <a href="http://www.loc.gov/standards/iso639-2/langhome.html">ISO 639 language tags</a></li>
						<li>language identification is about <em>text</em>, not about <em>characters</em></li>
					</ul>
				</ul>
			</slide>
			<part id="unicode">
				<title>Unicode</title>
				<slide>
					<title>XML's Idea of Content and Names</title>
					<p>XML documents can use a wide array of characters. They are defined by <a href="http://www.unicode.org/">Unicode</a>, which currently (Version 5.0) defines more than 100'000 characters (#100'000 added in 2005).</p>
					<listing src="japanese1.xml"/>
					<listing src="japanese2.xml"/>
				</slide>
				<slide>
					<title>XML and Unicode</title>
					<ul>
						<li>XML is based on Unicode</li>
						<ul>
							<li>XML is defined in terms of <a href="http://www.w3.org/TR/xml/#sec-starttags">character structures</a></li>
							<li>how these characters are encoded is not part of XML</li>
						</ul>
						<li>How are XML documents encoded?</li>
						<ul>
							<li>applications can use any character encoding they like</li>
							<li>XML processors <em>must</em> support UTF-8 and UTF-16</li>
							<li>XML processors <em>may</em> support any number of additional encodings</li>
						</ul>
						<li>How is the encoding <q>encoded</q>?</li>
						<ul>
							<li>part of the XML document: <code>&lt;?xml version="1.0" encoding="UTF-8"?></code></li>
							<li>bootstrap problem solved heuristically or by out-of-band information</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="uri">
				<title>Uniform Resource Identifier (URI)</title>
				<slide>
					<title>Identifiers are Essential</title>
					<ul>
						<li><em>Uniform Resource Locator (URL)</em> is the old concept</li>
						<ul>
							<li>introduced to distinguish between <em>locating</em> and <em>naming</em></li>
							<li><em>locating</em> and <em>naming</em> are two ways of <em>identification</em></li>
							<li>URLs have been replaced by URIs, technically URLs do not exist anymore</li>
						</ul>
						<li>URIs identify resources</li>
						<ul>
							<li>some resources may be retrieved using a protocol: <code href="">http://dret.net/netdret/</code></li>
							<li>not all resource access is retrieval: <code href="mailto:dret@berkeley.edu">mailto:dret@berkeley.edu</code></li>
							<li>sometimes computers are not required: <code href="tel:+1-510-6432253">tel:+1-510-6432253</code></li>
							<li>or resources cannot be located: <code href="urn:ietf:rfc:2648">urn:ietf:rfc:2648</code></li>
							<li>or location is the only means of identification: <code href="http://maps.google.com/maps?hl=en&amp;ie=UTF8&amp;om=1&amp;ll=27.988262,86.925277&amp;t=k">geo:27.988056;86.925278</code></li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>XML</title>
			<slide>
				<title>XML Use Cases</title>
				<ul>
					<li>XML is a metalanguage supporting application-specific vocabularies</li>
					<li><em>RSS</em> (and <em>Atom</em>) are XML vocabularies for newsfeeds</li>
					<ul>
						<li><a href="http://docordie.blogspot.com/">Doc or Die</a>: <a href="http://docordie.blogspot.com/rss.xml">RSS feed</a> vs. <a href="http://docordie.blogspot.com/atom.xml">Atom feed</a></li>
						<li>browsers now incorporate and/or integrate newsfeed readers</li>
					</ul>
					<li><em>OpenDocument (ODF)</em> is a language for office application documents</li>
					<ul>
						<li>designed for open and interoperable exchange</li>
						<li>standardized by ISO (which now also standardizes Microsoft's <em>Open XML</em>)</li>
					</ul>
					<li><em>Scalable Vector Graphics (SVG)</em> for portable vector graphics</li>
					<ul>
						<li>designed for embedding in Web pages</li>
						<li>good example for compound documents: <a href="http://www.carto.net/papers/svg/animated_weather_symbols/">HTML containing SVG</a></li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>XML Documents</title>
				<slide>
					<title>Markup?</title>
					<ul>
						<li>Structures are encoded using special characters</li>
						<ul>
							<li>a fundamental difference when comparing to binary formats</li>
							<li>markup languages can be read and modified using text-based tools</li>
							<li>programs must treat markup characters in a special way</li>
						</ul>
						<li>Documents are content interspersed with markup (i.e., structures)</li>
						<ul>
							<li>XML-aware software interprets the markup</li>
							<li>XML-unaware software just sees a text file</li>
							<li>modifications must be made XML-aware (e.g., inserting <q>AT&amp;T</q> as <q>AT&amp;amp;T</q>)</li>
						</ul>
						<li>You have to pay the <link href="markup-price"/></li>
					</ul>
				</slide>
				<slide>
					<title>Basic Concepts</title>
					<ul>
						<li>XML Documents have an <em>XML declaration</em> (optional)</li>
						<li>There is exactly one <em>document element</em> (a.k.a. <em>root element</em>)</li>
						<li>Elements may be nested (there is no conceptual limit)</li>
						<ul>
							<li>elements may be repeated (they can be identified by position)</li>
						</ul>
						<li>Elements are marked up using <em>tags</em></li>
						<ul>
							<li>most elements have content, surrounded by <em>start</em> and <em>end tags</em></li>
							<li>empty elements are allowed and may use a special notation</li>
						</ul>
						<li>Elements may have attributes (zero to any number)</li>
						<ul>
							<li>attributes can only occur once on an element (i.e., they cannot be repeated)</li>
						</ul>
					</ul>
					<listing src="my-first.xml"/>
				</slide>
				<slide id="xmltree">
					<title>Tree Syntax</title>
					<ul>
						<li>Markup is important, but only a notation</li>
						<li>XML documents are trees with different node types</li>
						<ul>
							<li>nodes so far: document, element, attribute, text</li>
						</ul>
						<img style="width : 90% ; margin : 4% ;" src="document-tree.png" title="XML document tree"/>
					</ul>
				</slide>
				<slide id="xmlelements">
					<title>Elements</title>
					<ul>
						<li>Elements can use a <a href="http://www.w3.org/TR/xml/#NT-Name">wide variety of names</a></li>
						<ul>
							<li>Allowed: <elem>html</elem>, <elem>id9832798472</elem>, <elem>_</elem>, <elem>:</elem>, <elem>こんにちは</elem></li>
							<li>Disallowed: leading numbers, spaces, control characters</li>
						</ul>
						<li>Element names usually convey some information about the content</li>
						<ul>
							<li>this is not reliable and highly language-dependent</li>
							<li>it is <em>very useful</em> when working with a known vocabulary</li>
							<li>it is <em>potentially harmful</em> when working with an unknown vocabulary</li>
						</ul>
						<li>Elements are the foundation for XML's versatility</li>
						<ul>
							<li>they can be nested (<code>&lt;address>&lt;city>Berkeley&lt;/city>&lt;zip>94709&lt;/zip>…</code>)</li>
							<li>they can be repeated (<code>&lt;givenname>Erik&lt;/givenname>&lt;givenname>Thomas&lt;/givenname></code>)</li>
							<li>their sequence can convey additional information (given names have a sequence)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Attributes</title>
					<ul>
						<li>Additional information pertaining to elements</li>
						<li>Traditionally, anything that is not considered <q>content</q></li>
						<ul>
							<li>SGML is a document markup language</li>
							<li>XML uses SGML's concepts</li>
							<li>XML has its roots in the document world</li>
						</ul>
						<li>Elements: Content (i.e., Data); Attributes: Metadata</li>
						<li>Documents often distinguish by what is textual content</li>
					</ul>
					<listing src="section.xml" line="12-20"/>
				</slide>
				<slide>
					<title>Attribute Syntax</title>
					<ul>
						<li>Naming rules are the same as for <link href="xmlelements"/></li>
						<li>Attributes always appear within an element's <em>start tag</em></li>
						<li>Attributes are <a href="http://www.w3.org/TR/xml/#NT-Attribute">name/value-pairs</a></li>
						<ul>
							<li>the value is enclosed in single or double quotes</li>
						</ul>
						<li>Attribute with a single-quote value: <elem>elem attr="Single: '"/</elem></li>
						<li>Attribute with a double-quote value: <elem>elem attr='Double :"'/</elem></li>
						<li>How can attribute values contain both?</li>
					</ul>
				</slide>
				<slide id="markup-price">
					<title>The Price for Markup</title>
					<ul>
						<li>Markup characters have a special meaning</li>
						<ul>
							<li><q>&lt;</q> opens a tag</li>
							<li>for attribute values, quotes delimit the value</li>
						</ul>
						<li>The literal use of a markup character requires escaping</li>
						<ul>
							<li>XML's <em>entities</em> can refer to pieces of content</li>
							<li>entity syntax is <code>&amp;name;</code> for referring to the entity <q><code>name</code></q></li>
							<li>XML has 5 <a href="http://www.w3.org/TR/xml/#sec-predefined-ent">predefined entities</a>: <code>&amp;lt;</code>, <code>&amp;gt;</code>, <code>&amp;amp;</code>, <code>&amp;apos;</code>, <code>&amp;quot;</code></li>
						</ul>
						<li>Attribute using both kinds of quotes: <code>&lt;elem attr="Single ' and Double &amp;quot;"/></code></li>
					</ul>
					<pre><![CDATA[<li>Attribute using both kinds of quotes: <code>&lt;elem attr="Single ' and Double &amp;quot;"/></code></li>]]></pre>
				</slide>
				<slide id="mixed-content">
					<title>Mixed Content</title>
					<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smiley.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>
					<pre><![CDATA[<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smiley.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>]]></pre>
					<img style="width : 90% ; margin : 4% ;" src="mixed-content.png" title="XML tree for mixed content"/>
				</slide>
				<slide>
					<title>Mixed Content Usage</title>
					<ul>
						<li>Database people find mixed content irritating</li>
						<ul>
							<li>cannot be easily mapped to relational structures</li>
							<li>is more <em>document-like</em> than <em>data-like</em></li>
							<li>much harder to optimize for query analysis and query processing</li>
						</ul>
						<li>Document people find mixed content very intriguing</li>
						<ul>
							<li>textual content can still be used as simple text</li>
							<li>markup provides additional information for rich text</li>
							<li>start with a text-only document and use markup to add structure to it</li>
						</ul>
					</ul>
				</slide>
				<slide id="whitespace">
					<title>Whitespace</title>
					<ul>
						<li>XML documents often are pretty-printed</li>
						<li><em>Whitespace text nodes</em> often are <q>not really content</q></li>
						<ul>
							<li>XML whitespace characters are <em>space</em>, <em>tab</em>, <em>newline</em>, and <em>carriage return</em></li>
							<li>whitespace text nodes are text nodes containing <em>only</em> whitespace characters</li>
						</ul>
						<img style="width : 90% ; margin : 4% ;" src="document-tree-whitespace.png" title="XML tree with whitespace text nodes"/>
					</ul>
				</slide>
				<slide>
					<title>Significant Whitespace</title>
					<ul>
						<li>Some whitespace text nodes are relevant</li>
						<li>Usually text nodes in <em>mixed content</em> elements</li>
					</ul>
					<p>Whitespace <i>can be</i> <u>very</u> <b>important</b>!</p>
					<pre><![CDATA[<p>Whitespace <i>can be</i> <u>very</u> <b>important</b>!</p>]]></pre>
					<img style="height : 40% ; margin : 2% ;" src="significant-whitespace.png" title="XML tree containing significant whitespace"/>
				</slide>
			</part>
			<part id="wellformed">
				<title>Processing XML</title>
				<slide>
					<title>Observing XML Syntax</title>
					<ul>
						<li>XML's syntax requires you to use the right characters</li>
						<ul>
							<li><a href="http://www.w3.org/TR/xml/#NT-element">the grammar alone</a> allows many XML errors</li>
							<li><a href="http://www.w3.org/TR/xml/#GIMatch">additional constraints</a> ensure that everything is used correctly</li>
						</ul>
						<li><em>XML processors</em> (a.k.a. <em>XML parsers)</em> check for these rules</li>
						<ul>
							<li>if there are problems, the document cannot be interpreted as XML</li>
							<li>otherwise, the document is said to be <em>well-formed</em></li>
						</ul>
						<li>Only well-formed documents can be regarded as a tree</li>
						<ul>
							<li>other documents are not XML at all, even though they may be close</li>
							<li>XML processors must report problems to the application (no <em>silent recovery</em>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Validity</title>
					<ul>
						<li><em>Well-formed documents</em> observe XML rules</li>
						<ul>
							<li>they observe the XML syntax</li>
							<li>they observe all well-formedness constraints</li>
						</ul>
						<li>Applications require the right elements and attributes</li>
						<li><em>Validity</em> is a more comprehensive concept</li>
						<li><em>Valid documents</em> observe additional rules</li>
						<ul>
							<li>they must be well-formed documents</li>
							<li>they must adhere to the constraints defined in a <link href="dtd"/></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Semantics</title>
					<ul>
						<li>XML is a language for encoding trees</li>
						<ul>
							<li>Elements and attributes are labeled node in this tree</li>
							<li>the labels can be chosen freely by document authors</li>
						</ul>
						<li>The tree's meaning is nothing XML is concerned with</li>
						<ul>
							<li>peers must have a mutual understanding of the semantics</li>
							<li>XML without mutual understanding is almost useless</li>
							<li>reverse engineering often is possible, but it is risky and brittle</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML Documents</title>
				<ul>
					<li>XML documents are structured data using markup</li>
					<li>Elements and Attributes are the main structuring mechanisms</li>
					<li>Elements and Attributes have names, but have no inherent semantics</li>
					<li>For using XML successfully, <em>shared semantics</em> are essential</li>
					<li><a href="a/1/">Assignment 1</a> asks you to think about semantics</li>
				</ul>
			</slide>
		</part>		
	</presentation>
	<presentation id="processing">
		<title>Processing XML</title>
		<date>2007-09-06</date>
		<toc class="resources"><a href="http://www.w3.org/DOM/" title="W3C DOM Home">DOM</a>&#160;· <a href="http://sax.sourceforge.net/">SAX</a></toc>
		<toc class="abstract">XML is a format for structured data, but it does not prescribe any way of processing these structures. In practice, XML data has to processed by using XML-specific support in some programming environment. In this lecture, the most popular ways of processing XML data are discussed; the <em>Document Object Model (DOM)</em> as a tree-based data model, the <em>Simple API for XML (SAX)</em> as an event-based programming model, and <em>XSL Transformations (XSLT)</em> as a dedicated programming language for transforming XML.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
        <part id="xml-processing">
			<title>Processing XML</title>
			<slide>
				<title>XML and Programming</title>
				<ul>
					<li>XML is a format for structured data</li>
					<ul>
						<li>trees do not map very well to most programming languages</li>
						<li>for working with XML, some mapping into the language is required</li>
					</ul>
					<li>There are two basic approaches for programming with XML:</li>
					<ol>
						<li>use special functions to work on XML documents as external data objects</li>
						<li>map XML documents to native data structures of the programming language</li>
					</ol>
					<li>A third approach is to have an <q>XML programming language</q></li>
					<ul>
						<li><link href="xslt-1">XSLT</link> is an example for an XML programming language</li>
						<li><link href="xsdl-1">XSDL</link> and <link href="xpath">XPath</link> become an integral part of Java in <a href="http://www.research.ibm.com/xj/">XJ</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML and Programming Languages</title>
				<ul>
					<li>Most programming languages do not support XML natively</li>
					<ul>
						<li>a certain impedance mismatch between both models in unavoidable</li>
					</ul>
					<li>Function libraries (or their equivalent) can provide XML processing facilities</li>
					<ul>
						<li><link href="sax">SAX</link> as an event-based API for accessing XML documents</li>
						<li><link href="dom">DOM</link> as a tree-based API for accessing XML documents</li>
					</ul>
					<li>Mapping between XML and the programming language can take two forms</li>
					<ul>
						<li>using hand-crafted code (based on XML functions) that performs the mapping</li>
						<li>generating code using an XML schema and/or target data structures in the language</li>
					</ul>
					<li>Generating mapping code can be done in two ways</li>
					<ul>
						<li>using a generic <link href="databinding"/> framework for the mapping</li>
						<li>using hand-crafted code that can be better tailored to the schemas</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Typical XML &amp; Programming Problem</title>
				<ul>
					<li><a href="../web-fall07/ajax">Asynchronous JavaScript and XML (Ajax)</a> is based on HTTP &amp; XML</li>
					<ul>
						<li>JavaScript code can communicate with the server using <code href="http://www.w3.org/TR/XMLHttpRequest/" title="W3C XMLHttpRequest Spec">XMLHttpRequest</code></li>
						<li>in theory, the server sends XML data which is processed by the script</li>
					</ul>
					<li>XML parsing and processing is inconvenient in JavaScript</li>
					<ul>
						<li>there is a impedance mismatch between JavaScript and XML</li>
						<li>if the client is slow and the XML is big, parsing can be time-consuming</li>
						<li>if all clients are JavaScript sending XML is not really necessary</li>
					</ul>
					<li><em>JavaScript Object Notation (JSON)</em> is a JavaScript-centric data model</li>
					<ul>
						<li>JavaScript code can directly instantiate JSON structures as runtime objects</li>
						<li>any non-JavaScript client (if there are any) will have to use JSON as well</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part id="sax">
			<title>Simple API for XML (SAX)</title>
			<slide>
				<title>Lightweight XML Processing</title>
				<ul>
					<li>SAX is an event-based API for accessing XML documents</li>
					<li>SAX allows users to use event handlers for parsing-related events</li>
					<ul>
						<li>the parser reads a document and recognizes markup structures</li>
						<li>for each recognized structure, a user-supplied function can be called</li>
					</ul>
					<li>SAX parsing requires little memory and can handle very large documents</li>
					<ul>
						<li>the breadth of the XML document tree is irrelevant to SAX parsing</li>
						<li>the depth of the tree is relevant for checking for well-formed documents</li>
					</ul>
					<li>SAX parsing does not allow random access or backward movement</li>
					<ul>
						<li>saving context and history is something the application has to manage</li>
						<li>at a certain complexity, SAX parsing requires a lot of additional code</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>SAX Parser</title>
				<img style="width : 90% ; margin : 2% ; " src="sax-parser.png" title="SAX Parser"/>
			</slide>
        </part>
        <part id="dom">
			<title>Document Object Model (DOM)</title>
			<slide>
				<title>XML Trees Everywhere</title>
				<ul>
					<li>DOM is a tree-based API for accessing XML documents</li>
					<ul>
						<li>the specification using a <a href="http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html">language-independent <em>Interface Definition Language (IDL)</em></a> </li>
						<li><q>language bindings</q> map IDL to specific languages such as <a href="http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/java-binding.html">Java</a> or <a href="http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/ecma-script-binding.html">JavaScript</a></li>
					</ul>
					<li>DOM is based on a in-memory representation of an XML document</li>
					<ul>
						<li>random document access using <a href="http://www.w3.org/TR/DOM-Level-3-Core/core.html#ID-1950641247">the tree's node structure</a></li>
						<li>more specific tasks such as <a href="http://www.w3.org/TR/DOM-Level-3-Core/core.html#ID-217A91B8">getting an element's attribute by name</a></li>
					</ul>
					<li>DOM parsers have an additional layer for building the tree</li>
					<ul>
						<li>an underlying SAX parser reports structures for tree building</li>
						<li>the memory representation is heavily interlinked (requiring substantial memory)</li>
						<li>DOM calls query or modify the memory representation of the tree</li>
					</ul>
					<li>DOM processing is not appropriate for all tasks</li>
					<ul>
						<li>very large documents may not fit into memory (risk of <em>thrashing</em>)</li>
						<li>for isolated tasks, the parsing overhead is prohibitive</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>DOM Parser</title>
				<img style="width : 90% ; margin : 2% ; " src="dom-parser.png" title="DOM Parser"/>
			</slide>
			<slide id="jdom">
				<title>JDOM</title>
				<ul>
					<li>DOM is not optimized for a specific programming language</li>
					<ul>
						<li>DOM knowledge can be easily transferred between programming languages</li>
						<li>programming with DOM in a given language often is not very convenient</li>
					</ul>
					<li>JDOM is a Java-specific version of a tree-based XML API</li>
					<ul>
						<li>represents the same concepts as DOM (XML structures)</li>
						<li>represents XML concepts <a href="http://www-128.ibm.com/developerworks/java/library/j-jdom/#h2">in a more Java-friendly way</a></li>
						<li>JDOM has no relationship with the W3C's DOM API</li>
					</ul>
					<li>JDOM can be built on top of almost any parser</li>
					<ul>
						<li>SAX is a pretty common choice for a foundation for JDOM</li>
						<li>SAX events are then used to build the JDOM tree</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part id="databinding">
			<title>XML Data Binding</title>
			<slide>
				<title>Mapping XML into Languages</title>
				<ul>
					<li>XML data binding connects XML with language-specific structures</li>
					<ul>
						<li>for OO languages this often means mapping schemas and classes</li>
						<li>code for serialization and deserialization can then be generated</li>
					</ul>
					<li>Typical problems of data binding are schema changes</li>
					<ul>
						<li>if the schema is updated, can the code be migrated easily?</li>
						<li>can instances of different versions be handled by the same code?</li>
						<li>most data binding frameworks do not fully support XSDL anyway</li>
					</ul>
					<li>Several XML data binding frameworks are in widespread use</li>
					<ul>
						<li><a href="https://jaxb.dev.java.net/">Java Architecture for XML Binding (JAXB)</a></li>
						<li>Castor, another Java-based data binding framework</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part id="xslt-intro">
			<title>XSL Transformations (XSLT)</title>
			<slide>
				<title>An XML Programming Language</title>
				<ul>
					<li>XSLT is not practical as a general-purpose programming language</li>
					<ul>
						<li>input and output and limited to handling documents (XML and plain text)</li>
						<li>system programming is not part of the language model</li>
					</ul>
					<li>XSLT is a very natural choice for XML-centric tasks</li>
					<ul>
						<li>XML is the data model of XSLT (technically, it now is <link href="xdm">XDM</link>)</li>
						<li>simple values use <link href="xsdl-1">XSDL</link>'s <link href="xsdl-simple-types"/></li>
						<li>structured values are XML trees</li>
					</ul>
					<li>XSLT and XML data binding not always work well together</li>
					<ul>
						<li>XML data binding often is regarded as <q>just a serialization</q> of language structures</li>
						<li>in these cases, the XML is hard to use outside of the language where it originated</li>
						<li>these scenarios are a telling sign for <em>poor use of XML</em></li>
					</ul>
				</ul>
			</slide>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Document Engineering</title>
				<ul>
					<li>Documents are more important than programs</li>
					<li>Programs must be able to easily work with documents</li>
					<li>XML APIs make XML structures available for programs</li>
					<li>XML data binding maps XML to language structures</li>
					<li>XSLT uses XML as its native data model</li>
				</ul>
			</slide>
			<slide>
				<title>Assignment 2</title>
				<ul>
					<li>Implement <a href="a/1/">Assignment 1</a> using simple XML structures</li>
					<li><a href="a/2/">Assignment 2</a> asks for a number of sample entries</li>
					<li>Conceptual models can be represented in XML in many different ways</li>
				</ul>
			</slide>
        </part>
	</presentation>
	<presentation id="dtd">
		<title short="DTD">Document Type Definition (DTD)</title>
		<date>2007-09-11</date>
		<toc class="resources"><a href="xml-quickref.pdf">XML QuickRef</a></toc>
		<toc class="abstract">The XML specification defines a format for structured data (XML documents) and a grammar-based constraint language for these (DTD). In SGML-based systems, DTDs were often very complex and feature-rich constructs, which controlled a lot of the processing of SGML documents. XML greatly simplified DTDs, and de-facto usage of DTDs today simplified them even more. In many systems today, DTDs are not used at all or generated from sample documents. In this lecture, it is argued that DTDs (or schemas, to be more general) should be taken seriously in any non-trivial XML application, because they are a representation of the underlying (and often underspecified) data model of the application.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Schema Languages</title>
			<slide>
				<title>XML Validation</title>
				<ul>
					<li>XML knows two <q>states</q> of documents, <em>well-formed</em> and <em>valid</em></li>
					<li><em>well-formed</em> documents satisfy all basic constraints of the XML specification</li>
					<ul>
						<li>they can be parsed according to the XML grammar</li>
						<li>they satisfy the additional constraints (e.g., start and end tags match)</li>
						<li>together, this means they can be translated into a <link href="xmltree">tree</link></li>
					</ul>
					<li><em>valid</em> documents have been validated against a DTD</li>
					<ul>
						<li>a document must be well-formed before it can be validated</li>
						<li>all elements and attributes have been defined</li>
						<li>elements and attributes are used according to their definition</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Validation and Applications</title>
				<img src="valid-documents.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
			<slide>
				<title>Non-XML, Well-Formed, and Valid</title>
				<listing src="non.xml" line="3-9"/>
				<listing src="address-invalid.xml" line="3-9"/>
				<listing src="address-valid.xml" line="3-9"/>
			</slide>
			<slide>
				<title>DTD Example</title>
				<listing src="address-valid.xml" line="1-2"/>
				<listing src="address.dtd"/>
				<ul>
					<li>The DTD defines constraints on element and attribute usage</li>
					<li>The DTD does only in part constrain textual contents</li>
				</ul>
			</slide>
			<slide>
				<title>XML Schema Languages</title>
				<ul>
					<li>DTDs are part of XML itself</li>
					<ul>
						<li>XML specifies the document format <u>and</u> one schema language</li>
						<li>DTD support is provided by most XML processors (<a href="http://www.w3.org/TR/REC-xml/#proc-types" title="XML specification">validating processors</a>)</li>
					</ul>
					<li>Other schema languages are available</li>
					<ul>
						<li><link href="xsdl-1">XSDL</link> as the W3C's recommendation</li>
						<li><link href="schematron"/> as a rule-based alternative</li>
						<li>various <a href="http://dret.net/glossary/xmlschemalanguage" title="XML glossary">other research projects and products</a></li>
					</ul>
					<li>Choosing appropriate schema language(s) is important</li>
					<ul>
						<li>we look at DTDs because they are part of XML itself</li>
						<li>we look at XSDL because it is widely used</li>
						<li>we look at Schematron because it is simple and powerful</li>
						<li>you may even invent your own schema language (a.k.a. <em>meta-programming</em>)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>DTD Basics</title>
			<slide>
				<title>XML is SGML light</title>
				<ul>
					<li>XML is a subset of SGML</li>
					<ul>
						<li>XML documents have been greatly simplified</li>
						<li>XML DTDs have retained more of SGML's peculiarities</li>
					</ul>
					<li>DTD design should be left to XML experts</li>
					<ul>
						<li>simple DTDs (for prototypes) are easy to define (or generate)</li>
						<li>serious DTDs for complex data models are hard to define</li>
					</ul>
					<li>XML is a useful tool for experiments and prototypes</li>
					<ul>
						<li>basic knowledge of DTDs is required</li>
						<li>serious XML schemas often use <link href="xsdl-1">XSDL</link> anyway</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Connecting Documents and DTDs</title>
				<ul>
					<li>A DTD is a schema for a set of documents</li>
					<ul>
						<li>there may be just one document for a DTD, there may be billions (HTML)</li>
						<li>in most cases, DTDs are managed as a separate resource</li>
					</ul>
					<li>The <a href="http://www.w3.org/TR/xml#sec-prolog-dtd"><em>Document Type Declaration</em></a> <q>contains or points to markup declarations that provide a grammar for a class of documents</q></li>
					<ul>
						<li>the part which is contained is called <em>Internal Subset</em></li>
						<li>the part which is pointed to is called <em>External Subset</em></li>
						<li>internal and external subset together are the <em>Document Type Definition (DTD)</em></li>
					</ul>
					<li>External subsets are identified by <em>Public</em> and <em>System Identifiers</em></li>
					<ul>
						<li><em>public identifiers</em> use a special notation</li>
						<li><em>system identifiers</em> are URIs (relative or absolute)</li>
						<li>applications use (i.e., know or retrieve) the DTD for validation</li>
					</ul>
				</ul>
				<listing src="address-valid.xml" line="1-2"/>
			</slide>
			<part>
				<title>DTD Syntax</title>
				<slide>
					<title>DTDs are not XML Documents</title>
					<ul>
						<li>DTDs use a special syntax</li>
						<ul>
							<li>somewhat ironic when everything else is XMLized</li>
							<li>DTDs cannot be processed with standard XML tools</li>
							<li>more compact than XML syntax</li>
						</ul>
						<li>Definition of elements and attribute lists</li>
						<ul>
							<li>elements are defined by the content they allow</li>
							<li>attribute lists are sets of allowed attributes on elements</li>
						</ul>
					</ul>
					<listing src="address.dtd"/>
				</slide>
				<slide>
					<title>Syntax Rules</title>
					<ul>
						<li>There is no container containing the whole DTD</li>
						<ul>
							<li><code>&lt;!ELEMENT xml EMPTY></code> thus is a complete DTD</li>
						</ul>
						<li>Definitions (officially called <em>declarations</em>) use <code>&lt;!… ></code> syntax</li>
						<ul>
							<li><code>ELEMENT</code> is used to <link href="dtd-element">define an element</link></li>
							<li><code>ATTLIST</code> is used to <link href="dtd-attlist">define an attribute list</link></li>
							<li><code>ENTITY</code> is used to <link href="dtd-entity">define an entity</link></li>
						</ul>
						<li>The document element is not marked explicitly</li>
						<ul>
							<li>but it must be declared in the document type declaration</li>
							<li>this means the document element is defined by the document, not by the DTD</li>
						</ul>
					</ul>
				<listing src="address-valid.xml" line="1-3"/>
				</slide>
			</part>
			<part id="dtd-element">
				<title>Defining Elements</title>
				<slide id="element-only-declaration">
					<title>Element Only Content</title>
					<ul>
						<li>Element content is defined by a grammar for the children</li>
						<ul>
							<li>sequences are indicated with a comma: <q><code>,</code></q></li>
							<li>choices are indicated with a vertical bar: <q><code>|</code></q></li>
							<li>optional parts are indicated with a question mark: <q><code>?</code></q></li>
							<li>repeatable parts are indicated with a plus: <q><code>+</code></q></li>
							<li>optional and repeatable parts are indicated with a asterisk: <q><code>*</code></q></li>
							<li>parentheses can be used for grouping and nesting</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="1064-1074"/>
				</slide>
				<slide id="mixed-content-declaration">
					<title>Mixed Content</title>
					<ul>
						<li><link href="mixed-content"/> allows text content and elements to be mixed</li>
						<ul>
							<li><link href="whitespace"/> characters are allowed in <link href="element-only-declaration"/> (this must not be declared)</li>
							<li>for non-whitespace characters, character data must be allowed explicitly</li>
						</ul>
						<li>The allowed child elements may be constrained, but not their order or their number of occurrences</li>
						<li>Mixed Content always is defined as <code>&lt;!ELEMENT x (#PCDATA | a | b | …)* ></code></li>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="568-568"/>
					<ul>
						<li><em>Character only</em> content is a special case of mixed content</li>
						<ul>
							<li>the element may only contain characters (no other elements)</li>
							<li>the repetition is not necessary because there is no choice</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="355-355"/>
				</slide>
				<slide>
					<title>Empty Content</title>
					<ul>
						<li>Empty elements can be useful</li>
						<ul>
							<li>they may contain all information in attributes</li>
							<li>their presence may carry semantics without the need for additional information</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="833-848"/>
				</slide>
			</part>
			<part id="dtd-attlist">
				<title>Defining Attribute Lists</title>
				<slide>
					<title>Attributes belong to Elements</title>
					<ul>
						<li>Attributes are specified in an element's <em>Attribute List</em></li>
						<ul>
							<li>an element definition may have any number of attributes associated with it</li>
							<li>attributes may occur at most once on an element</li>
						</ul>
						<li>Attributes definitions have a <em>name</em>, a <em>type</em>, and a <em>default declaration</em></li>
						<ul>
							<li>the attribute appears according to the default declaration</li>
							<li>if the attribute is present, its value must conform to the type</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="794-801"/>
				</slide>
				<slide id="dtd-attr-type">
					<title>Attribute Types</title>
					<ul>
						<li>Attribute values can be constrained (which is not possible for element content)</li>
						<ul>
							<li><code>CDATA</code> means any character string (but no markup)</li>
							<li>enumerated types list allowed values: <code>(data|ref|object)</code> (list of XML names)</li>
							<li><code>ID</code> for identifying elements (part of <code><link href="ididref"/></code>)</li>
							<li><code>IDREF</code> for referencing identified elements (part of <code><link href="ididref"/></code>)</li>
						</ul>
						<li>Application-oriented attribute types are often <q>simulated</q></li>
						<ul>
							<li>using <link href="param-entity"/>, modeling information can be preserved</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="894-894"/>
					<listing src="xhtml1-transitional.dtd" line="52-53"/>
					<ul>
						<li>The default declaration specifies the attribute's presence</li>
						<ul>
							<li><code>#REQUIRED</code> means the attribute has to be specified (on every element)</li>
							<li><code>#IMPLIED</code> marks an optional attribute (the parser may imply a value)</li>
							<li><code>"…"</code> specifies a default value (and the attribute is optional)</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Advanced DTDs</title>
			<part id="ididref">
				<title>ID/IDREF</title>
				<slide>
					<title>References in Documents</title>
					<ul>
						<li>Without Validation, there are no IDs</li>
						<ul>
							<li><code>ID</code> is an <link href="dtd-attr-type">attribute type</link> declared in the DTD</li>
							<li><code>xml:id</code> is an attempt to support schema-independent IDs</li>
						</ul>
						<li>IDs are used to assign identities to elements</li>
						<ul>
							<li>the XML processor reports duplicate IDs as errors (<a href="http://www.w3.org/TR/xml/#id">part of validation</a>)</li>
						</ul>
						<li>IDREFs are used to reference existing IDs</li>
						<ul>
							<li>the XML processor reports references to non-existing IDs as errors (<a href="http://www.w3.org/TR/xml/#idref">part of validation</a>)</li>
						</ul>
						<li>IDs must be XML Names (in particular, they may not start with a number)</li>
					</ul>
				</slide>
				<slide>
					<title>ID/IDREF in a Document</title>
					<listing src="section.xml" line="3-18"/>
					<listing src="section.dtd" line="2-12"/>
				</slide>
				<slide>
					<title>References within the Tree</title>
					<img src="section.png" style="width : 90% ; margin : 4% ; "/>
				</slide>
				<slide>
					<title>Formatting Example</title>
					<p>XSLidy can generate links to sections such as the section about <link href="ididref"/>, this link is then translated into the appropriate HTML code, meaning a link with the target being a fragment identifier to the slide number.</p>
					<pre><![CDATA[<p>XSLidy can generate links to sections such as the section about <link href="ididref"/>, this link is then translated into the appropriate HTML code, meaning a link with the target being a fragment identifier to the slide number.</p>]]></pre>
					<p>After running XSLidy, the following HTML is generated:</p>
					<pre><![CDATA[<p>XSLidy can generate links to sections such as the section about <a href="#(23)">ID/IDREF</a>, this link is then translated into the appropriate HTML code, meaning a link with the target being a fragment identifier to the slide number.</p>]]></pre>
				</slide>
				<slide>
					<title>ID/IDREF Semantics</title>
					<ul>
						<li>Rooted in the document world</li>
						<ul>
							<li>all parts are assembled before processing</li>
							<li>names are symbolic and assigned as required</li>
							<li>mixed syntax and semantics</li>
						</ul>
						<li>Good idea, but many shortcomings</li>
						<ul>
							<li>constraints apply to one document only</li>
							<li>IDs and IDREFs are global instead of scoped</li>
							<li>identifiers should be allowed to use any type</li>
							<li>identifier processing should be type-specific (2 &#x225F; 02)</li>
						</ul>
						<li>Applications must know how to process ID/IDREF</li>
						<ul>
							<li>for HTML export, links can be generated</li>
							<li>for databases, keys should be used</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="dtd-entity">
				<title>Entities</title>
				<slide>
					<title>General Entities</title>
					<ul>
						<li>XML's core concept of physical data structures</li>
						<ul>
							<li>an entity is a named unit of data which can be referenced</li>
							<li>within documents, it is referenced by the markup <code>&amp;entity-name;</code></li>
						</ul>
						<li>Entities can be used to name and reuse document content</li>
					</ul>
					<listing src="xhtml-lat1.ent" line="135-142"/>
					<ul>
						<li><em>Character References</em> look like entities: <code>&amp;#9786;</code> or <code>&amp;#x263A;</code> = &#x263A;</li>
						<ul>
							<li>they can be used to represent any Unicode character, they are processed as single characters</li>
						</ul>
					</ul>
				</slide>
				<slide id="param-entity">
					<title>Parameter Entities</title>
					<ul>
						<li>Parameter entities are parsed entities for use within the DTD</li>
						<ul>
							<li>a parameter entity must be specifically declared as such</li>
							<li>within DTDs, it is referenced by the markup <code>%entity-name;</code></li>
							<li>outside of DTDs, parameter entities cannot be used</li>
						</ul>
						<li>As general entities, parameter entities are meant for reuse</li>
						<ul>
							<li>in a DTD, reuse is mostly about reusing structures</li>
							<li>parameter entities are DTDs <q>duct tape</q>, not elegant, but effective</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XHTML Parameter Entities (Attributes)</title>
					<listing src="xhtml1-transitional.dtd" line="433-437"/>
					<listing src="xhtml1-transitional.dtd" line="188-188"/>
					<listing src="xhtml1-transitional.dtd" line="133-138"/>
					<listing src="xhtml1-transitional.dtd" line="145-149"/>
					<listing src="xhtml1-transitional.dtd" line="55-56"/>
					<listing src="xhtml1-transitional.dtd" line="193-193"/>
				</slide>
				<slide>
					<title>XHTML Parameter Entities (Content)</title>
					<listing src="xhtml1-transitional.dtd" line="433-437"/>
					<listing src="xhtml1-transitional.dtd" line="230-230"/>
					<listing src="xhtml1-transitional.dtd" line="227-227"/>
					<listing src="xhtml1-transitional.dtd" line="203-204"/>
					<listing src="xhtml1-transitional.dtd" line="200-201"/>
					<listing src="xhtml1-transitional.dtd" line="197-198"/>
					<listing src="xhtml1-transitional.dtd" line="222-222"/>
				</slide>
			</part>
		</part>
		<part>
			<title>More Advanced DTDs</title>
			<slide>
				<title>Additional Mechanisms</title>
				<ul>
					<li>DTDs have more advanced mechanisms</li>
					<ul>
						<li>used in few applications, mostly by SGML veterans</li>
						<li>should not be used in new projects</li>
					</ul>
					<li><em>Conditional Sections</em> for configurable DTDs</li>
					<ul>
						<li>parts of a DTD can be enclosed in special constructs</li>
						<li>based on parameter entity setting, these parts can be switched <q>on</q> or <q>off</q></li>
					</ul>
					<li><em>External Entities</em> for referencing external resources</li>
					<ul>
						<li><em>parsed entities</em> contain content parsed by the XML processor</li>
						<li>inclusion should be done with <em>XInclude</em></li>
						<li><em>unparsed entities</em> contain non-XML content (e.g., images or plain text)</li>
						<li>referring to non-XML content is handled on the application level</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>DTD for XML Schemas</title>
				<ul>
					<li>XML documents are processed by applications</li>
					<li>Applications have assumptions about XML documents</li>
					<li>DTDs allow to formalize some of these constraints</li>
					<li>Part of the constraint checking must still be programmed</li>
				</ul>
			</slide>
			<slide>
				<title>Modeling DTDs</title>
				<ul>
					<li>Data models can be mapped to many different DTDs</li>
					<li>What is a good DTD? What is a bad DTD?</li>
					<li>How does the DTD affect further processing?</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="bestpractices">
		<title short="Best Practices">The Good, the Bad, and the Ugly</title>
		<date>2007-09-13</date>
		<toc class="resources"><a href="http://dret.net/netdret/docs/wilde-elpub2006-xml.pdf">Structuring Content with XML</a>&#160;· <a href="http://www.tbray.org/ongoing/When/200x/2006/01/09/On-XML-Language-Design">On XML Language Design</a></toc>
		<toc class="abstract">While XML it rather easy to understand and use, it is also rather easy to use XML in ways which either produce <q>ugly</q> XML, or which may lead to problems in components further processing the XML. The topic of this lecture thus is to look at design guidelines for XML schemas, leading to <q>good</q> XML. Some of the simpler topics cover basic questions of how to map a data model to XML markup (e.g., when to use elements or attributes). The next question is how data should be represented in XML so that applications can process it efficiently. We also look at what part of the markup an application will actually have access to, and this is defined by the <em>XML Information Set (Infoset)</em>, the specification underlying many XML technologies.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XML Best Practices</title>
			<ul>
				<li><link href="goodxml">Good</link>: What you should do when using XML</li>
				<li><link href="badxml">Bad</link>: What you should not do when using XML</li>
				<li><link href="uglyxml">Ugly<sup>1</sup></link>: What you maybe have to do when using XML</li>
				<li><link href="infoset">Ugly<sup>2</sup></link>: XML's ugly little secret …</li>
			</ul>
		</slide>
		<part id="goodxml">
			<title short="Good XML">XML Best Practices</title>
			<slide>
				<title>Markup and Schemas</title>
				<ul>
					<li>XML can be encountered in different ways</li>
					<ol>
						<li>as somebody having to process XML documents</li>
						<li>as somebody having to understand XML documents</li>
						<li>as somebody having to generate XML documents</li>
						<li>as somebody having to design XML schemas</li>
					</ol>
				</ul>
			</slide>
			<part id="good-documents">
				<title>XML Documents</title>
				<slide>
					<title>Generating XML</title>
					<ul>
						<li>Character encoding</li>
						<ul>
							<li>use one of XML's standard encodings (UTF-8 or UTF-16)</li>
							<li>if you are using mostly latin characters, UTF-8 is much more compact</li>
							<li>any other character encoding may cause interoperability issues</li>
						</ul>
						<li>Pretty-printing (adding line feeds and indentation)</li>
						<ul>
							<li>pretty-printed XML is easier to read for humans</li>
							<li>pretty printed XML contains unnecessary whitespace</li>
							<li>pretty-printing is good for experiments and prototypes</li>
							<li>pretty printing should be switched off for production systems</li>
						</ul>
					</ul>
				</slide>
				<slide id="xml-views">
					<title>XML Views</title>
					<ul>
						<li>Other people may use different tools</li>
						<ul>
							<li>XML is a character-based formats, so every character counts</li>
							<li>other people may choose different technologies</li>
							<li>even your XML editor may choose to see things differently</li>
						</ul>
						<li>Many XML technologies use abstractions</li>
						<ul>
							<li>useful for concentrating on the <em>tree view</em></li>
							<li>no full control of markup usage (automatic serialization)</li>
							<li>think about working with a tree rather than working with a text file</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="good-dtd">
				<title>XML DTDs</title>
				<slide id="model-to-markup">
					<title>From Model to Markup</title>
					<ul>
						<li>There should be a conceptual model of the data</li>
						<ul>
							<li>formal conceptual models for XML are an active field of research</li>
							<li>informal models may use any notation</li>
						</ul>
						<li>Model design should omit questions of markup design</li>
						<ul>
							<li>element/attribute decisions are not a model question</li>
							<li>hierarchy/reference decisions are not a model question</li>
							<li>identifying the relevant entities and their relationships is a good idea</li>
						</ul>
						<li>Document engineering never invented modeling tools</li>
						<ul>
							<li>for document modelers, <q>the markup is the model</q></li>
							<li>there are no established notations for modeling documents</li>
							<li>document-type parts (e.g., mixed content) are hard to include in models</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>From Graphs to Trees</title>
					<ul>
						<li>In the model, <em>n:m</em> relationships may appear</li>
						<ul>
							<li>in an address database, an address should be reusable</li>
							<li>in a résumé, an organization's information should be reusable</li>
						</ul>
						<li>XML documents are trees</li>
						<ul>
							<li>all non-tree structures must be represented by tree structures</li>
							<li>in most cases, this will be done by introducing references</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>From Markup to Model</title>
					<ul>
						<li>Start with a sample instance</li>
						<ol>
							<li>start with a sample instance</li>
							<li>generate a schema for the instance with some tool</li>
							<li>open up the schema where necessary</li>
							<li>try creating more example instances <em>as different as possible/required</em></li>
							<li>write code for manipulating your test set of instances</li>
						</ol>
						<li>Restarting may be hard, but should be done</li>
						<ul>
							<li>view the initial design as a test bed, not as the <q>first version</q></li>
							<li>after you have learned some lessons, <em>throw everything away</em></li>
							<li>restart by designing everything from scratch</li>
							<li>content may be salvaged by writing small XSLT programs</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Top-Down or Bottom-Up?</title>
					<ul>
						<li>Both strategies have strengths and shortcomings</li>
						<ul>
							<li><em>top-down</em> tends to result in markup which looks <q>generated</q></li>
							<li><em>bottom-up</em> tends to result in markup which is less consistent</li>
						</ul>
						<li>Consistency is an important consideration</li>
						<ul>
							<li>if you dislike attributes, avoid them wherever possible</li>
							<li>if you like attributes, use them wherever possible</li>
							<li>don't mix these two styles of markup design</li>
						</ul>
					</ul>
				</slide>
				<slide id="reuse">
					<title>Reuse is Good</title>
					<ul>
						<li>Elements can be reused in different contexts</li>
						<ul>
							<li>elements then appear in the content model of more than one element</li>
							<li>an <code>address</code> may be used for <code>employee</code> as well as for <code>customer</code></li>
						</ul>
						<li>Content can be reused in different contexts</li>
						<ul>
							<li>(parts of) a content model may be useful in different contexts</li>
							<li>this only reuses an element's content, but not its name</li>
						</ul>
						<li>Attributes can be reused in different contexts</li>
						<ul>
							<li>technically, attributes are element-specific and have no relations when appearing on different elements</li>
							<li>when reusing attribute names, they should represent the same concept</li>
						</ul>
					</ul>
					<listing src="reuse.xml" line="3-16"/>
				</slide>
				<slide>
					<title>Reuse is Hard (in DTDs)</title>
					<ul>
						<li>Element reuse simply lists the element in more than one content model</li>
						<li>Content reuse requires parameter entities</li>
						<li>Attribute reuse requires parameter entities</li>
						<li>Nested parameter entities for multi-level reuse</li>
					</ul>
					<listing src="reuse.dtd"/>
				</slide>
			</part>
			<part>
				<title>General XML Issues</title>
				<slide id="element-vs-attribute">
					<title>Element vs. Attribute</title>
					<ul>
						<li>Elements and attributes are containers</li>
						<ul>
							<li>both contain character content</li>
						</ul>
						<li>Elements may carry attributes and may contain other elements</li>
						<ul>
							<li>for nested structures, elements must be chosen</li>
							<li>if the content needs to be annotated with an attribute, an element must be chosen</li>
							<li>if the item should be repeatable, an element must be chosen</li>
						</ul>
						<li>Attributes use less markup and have types</li>
						<ul>
							<li>if the content is (unstructured) <q>metadata</q>, an attribute may be a good choice</li>
							<li>for special types (ID/IDREF and enumerations), attributes are required</li>
							<li>if simple markup is an issue, attributes may be preferable</li>
						</ul>
						<li>Be consistent in you markup design style!</li>
					</ul>
				</slide>
				<slide>
					<title>Hierarchy vs. Reference</title>
					<ul>
						<li>Hierarchies are only possible with <em>1:n</em> relationships</li>
						<ul>
							<li>for <em>n:m</em> relationships, references are the only possible representation</li>
						</ul>
						<li>Containment should be represented as hierarchy</li>
						<ul>
							<li>containment limits the lifetime of the contained part to that of the container</li>
						</ul>
					</ul>
					<listing src="address-hierarchy.xml" line="2-11"/>
					<listing src="address-reference.xml" line="2-11"/>
				</slide>
				<slide id="granularity">
					<title>Granularity</title>
					<ul>
						<li>XML structures should identify the relevant information</li>
						<ul>
							<li>what exactly means <q>relevant</q>?</li>
							<li>very high granularity makes data acquisition hard</li>
							<li>very high granularity makes data processing easy</li>
						</ul>
						<li>Granularity is a general problem of data modeling</li>
						<ul>
							<li>XML is simply a syntax for representing structured data</li>
							<pre>&lt;phone>+1-510-6432253&lt;/phone></pre>
							<pre>&lt;phone cc="1" area="510" local="6432253"/></pre>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="badxml">
			<title>Bad XML</title>
			<slide>
				<title>Consistent Markup</title>
				<ul>
					<li>Decide on a strategy and stick to it</li>
					<li>Inconsistent markup is hard to work with</li>
					<li>Do not try to use markup itself for data representation</li>
					<ul>
						<li><q>attribute values in single quotes should be ignored</q></li>
						<li><q>empty elements using empty element tags have a special meaning</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Simple Markup</title>
				<ul>
					<li>XML can be read and edited by hand</li>
					<ul>
						<li>this depends on the application scenario and markup design</li>
						<li>human-accessible XML should be a markup design goal</li>
					</ul>
					<li>Tool requirements</li>
					<ul>
						<li>if your documents can only be used with tool xyz, something is wrong</li>
						<li>XML should be used for open data formats in open environments</li>
					</ul>
					<li>Undocumented side-effects</li>
					<ul>
						<li>data models may include more dependencies than encoded in the schema</li>
						<li>clearly document these side-effects so that users are warned</li>
						<li>if possible, document them in a machine readable way using <link href="schematron">a schema language</link></li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="uglyxml">
			<title>Ugly XML</title>
			<slide id="redundant-data">
				<title>Redundant Data</title>
				<ul>
					<li>Redundant data is bad</li>
					<ul>
						<li>database design emphasizes <em>normalization</em> to eliminate redundant data</li>
						<li>normalization is difficult, creates complex structures, and makes data access slower</li>
						<li>real-life models and databases always contain redundancies</li>
					</ul>
					<li>Redundant data is used very frequently</li>
					<ul>
						<li>the <a href="http://zip4.usps.com/zip4/citytown_zip.jsp">ZIP code identifies state and city/cities</a></li>
						<li>very few address databases normalize street names (or numbers)</li>
					</ul>
					<li>Redundancy can be used for error detection/correction</li>
				</ul>
			</slide>
			<slide id="schema-redundancy">
				<title>Redundancy in the Schema</title>
				<ul>
					<li>Redundant data in schemas is very bad</li>
					<ul>
						<li>schema inspection cannot reveal the <q>same objective</q> behind the same markup</li>
						<li>further schema development will introduce inconsistencies</li>
					</ul>
					<li>Redundant data in schemas should be avoided</li>
					<ul>
						<li>schemas are a small and well-designed dataset</li>
						<li>schema design and maintenance are important issues</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Generically Generated Markup</title>
				<ul>
					<li>Some XML designers generate their schemas</li>
					<ul>
						<li>generated schemas are more likely to be not very well-designed</li>
						<li>the schema generation process may be poorly implemented</li>
					</ul>
					<li>Some schemas are based on a very generic markup</li>
					<ul>
						<li>the structure actually is in the content, not in the markup</li>
						<li>XML tools will not be very useful when working with these documents</li>
					</ul>
				</ul>
				<listing src="generic.xml" line="2-14"/>
			</slide>
		</part>
		<part id="infoset">
			<title short="Infoset">XML Information Set (XML Infoset)</title>
			<slide>
				<title>What is the Content of an XML Document?</title>
				<ul>
					<li>An interesting (and fruitless) discussion</li>
					<ul>
						<li>the content is whatever you consider it to be</li>
						<li>agreement between peers is necessary for data exchange</li>
						<li>agreement between specification writers and toolmakers is necessary to provide tools</li>
					</ul>
					<li>DOM and XSLT were two early arrivals</li>
					<ul>
						<li>both had an idea (and a model) of what the content of an XML document is</li>
						<li>they did not have the exact same idea</li>
					</ul>
					<li>Set a normative standard for an XML document's content</li>
					<ul>
						<li>the Infoset defines what is represented in the tree</li>
						<li>people should be confident to get this information when using XML technologies</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Infoset Example</title>
				<img src="infoset-example.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
			<slide id="not-infoset">
				<title>What is <u>Not</u> in the Infoset</title>
				<ul>
					<li>Do not rely on <a href="http://www.w3.org/TR/xml-infoset/#omitted">information not available in the Infoset</a></li>
					<ul>
						<li>order of attributes</li>
						<li>type of quotes around attribute values</li>
						<li>notation of empty elements (<code>&lt;elem>&lt;/elem></code> vs. <code>&lt;elem/></code>)</li>
						<li>how lines are terminated</li>
						<li>entities and character references</li>
					</ul>
					<li>XML contains all this information if used as XML document</li>
					<li>many XML technologies are in fact Infoset technologies</li>
					<ul>
						<li>XSDL, XSLT, XQuery, SOAP, …</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML and Modeling</title>
				<ul>
					<li>XML is about representing structured data</li>
					<li>XML is a format for representing trees</li>
					<li>Data models often are not trees</li>
					<li>Mapping data models to trees can be done in many ways</li>
				</ul>
			</slide>
			<slide>
				<title>Assignment</title>
				<ul>
					<li><a href="a/2/">Assignment 2</a> is a simple Modeling task</li>
					<ul>
						<li>we provide a sample instance and some requirements</li>
						<li>create an XML version of the sample instance</li>
						<li>create a DTD which is more versatile than just working for the sample instance</li>
					</ul>
				</ul>
			</slide>
		</part>		
	</presentation>
	<presentation id="xmlns">
		<title short="Namespaces">XML Namespaces</title>
		<date>2007-09-18</date>
		<toc class="resources"><a href="http://www.rpbourret.com/xml/NamespacesFAQ.htm#p1">XML Namespaces FAQ (Part I)</a>&#160;· <a href="http://www.w3.org/TR/REC-xml-names/" title="W3C XML Namespaces Specification">Spec</a></toc>
		<toc class="abstract">XML is successful because it can be used in many different scenarios, and because it is easy to define a schema (such as a DTD) for new scenarios, producing a tailored XML data model for this scenario. This means that names in XML documents must be interpreted as belonging to a certain schema. As long as a document uses names from only one schema, this can be done rather easily. However, in many scenarios today documents combine names from different schemas, and <em>XML Namespaces</em> provide a mechanism how the names in an XML document can be associated with a namespace.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>How to think about Namespaces</title>
			<slide>
				<title>Namespaces are Simple</title>
				<ul>
					<li>XML Namespaces are often misunderstood</li>
					<ul>
						<li>the biggest problem is to get rid of some assumptions</li>
						<li>XML Namespaces are too simple and thus confusing</li>
					</ul>
					<li>Instincts of Web users</li>
					<ol>
						<li>URIs identify something that can be retrieved by a browser</li>
						<li>URIs identify something that can be displayed by a browser</li>
						<li>if I cannot get it and cannot look at it, what good can it be?</li>
					</ol>
					<li>However, these assumptions are not always true</li>
					<ol>
						<li>URIs identify <em>resources</em> which often, but not always, can be accessed over the Web</li>
						<li>URIs identify <em>resources</em> which often, but not always, have a Web-accessible representation</li>
						<li>sharing URIs means sharing an identity, which can mean sharing semantics (associated with this identity)</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Simple Examples</title>
				<listing src="mathml1.xml" line="2-6"/>
				<listing src="mathml2.xml" line="2-6"/>
				<listing src="mathml3.xml" line="2-6"/>
				<listing src="mathml4.xml" line="2-6"/>
			</slide>
			<slide>
				<title>Name Spaces</title>
				<ul>
					<li>Names are one form of identification</li>
					<li>Identification is essential for communications</li>
					<li>Names in XML are not suitable for identification</li>
					<ul>
						<li>they are local to their context (where they are defined)</li>
						<li>if the context is uniquely identified, the names would be, too</li>
					</ul>
					<li>Name Spaces: <em>Put names into spaces</em></li>
					<ul>
						<li>how to identify the space? Web things are identified by URIs</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>URI Philosophy</title>
				<ul>
					<li><link href="uri"/> uniquely identify resources</li>
					<li>URIs often provide access information</li>
					<ul>
						<li>pretty clear in <code>http://dret.net/lectures/xml-fall07/</code></li>
						<li>less clear in <code>urn:ietf:rfc:2648</code>  (<a href="http://dret.net/rfc-index/reference/RFC2648">RFC 2648</a>)</li>
						<li>very (and purposely) unclear in <code>tag:9327493874329</code>  (<a href="http://dret.net/rfc-index/reference/RFC4151">RFC 4151</a>)</li>
					</ul>
					<li>URIs often return <em>resource representations</em></li>
					<ul>
						<li>the resource itself is never returned (how to return a <em>lecture</em>?)</li>
						<li>some representation often is useful (HTML, PDF, maybe video/audio)</li>
						<li>the resource exists and is useful without a representation!</li>
					</ul>
					<li>URIs are much more than just addresses of HTML pages</li>
				</ul>
			</slide>
			<slide>
				<title>The Namespace Problem</title>
				<ul>
					<li>People assume that URIs point to Web pages</li>
					<ul>
						<li>a <em>namespace name</em> (a URI) may point to a Web page</li>
						<li>it may also have no Web page associated with it</li>
						<li>it may even use a URI scheme which cannot be retrieved</li>
						<li>but it is still possible to compare URIs!</li>
					</ul>
					<li>People assume some standardized content format</li>
					<ul>
						<li>friendly namespaces provide HTML portals (<a href="http://www.w3.org/1999/xhtml">XHTML</a>)</li>
						<li>some namespaces just give you the schema (<a href="http://www.w3.org/2001/12/soap-envelope">SOAP</a>)</li>
						<li>less friendly namespaces provide minimal information (<a href="http://www.w3.org/1999/XSL/Transform">XSLT</a>)</li>
						<li>very unfriendly namespaces may return a 404 or even use inaccessible schemes</li>
						<li>but they all are valid, because no resource representation is required!</li>
					</ul>
					<li>Namespaces are used by comparing URIs</li>
					<ul>
						<li>anything else maybe useful, but is not strictly required</li>
						<li>when searching for a namespace definition, use Google (string search)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Using Namespaces</title>
			<slide>
				<title>Declaring Namespaces</title>
				<ul>
					<li>Using a namespace means referencing names from it</li>
					<ul>
						<li>unfortunately, there is no really standard way of writing these names</li>
						<li>(the <q><a href="http://www.jclark.com/xml/xmlns.htm">Clark notation</a></q> is useful: <code>{http://www.w3.org/1999/xhtml}html</code>)</li>
						<li>Namespaces are declared and then used</li>
					</ul>
					<li><xml>xmlns</xml>-prefixed attributes are used for declaring namespaces</li>
					<ul>
						<li>Default: <elem>html xmlns="http://www.w3.org/1999/xhtml"</elem></li>
						<li>Prefix: <elem>xhtml:html xmlns:xhtml="http://www.w3.org/1999/xhtml"</elem></li>
					</ul>
					<li>Namespace declarations are inherited and can be overwritten</li>
					<ul>
						<li>the default namespace can be undeclared</li>
						<li>Namespace declarations can be used in a myriad of ways</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Unhealthy Namespace Usages</title>
				<ul>
					<li>Namespaces can be (and are) used in very weird ways</li>
					<ul>
						<li>these are syntax variations of identical structures</li>
						<li>without a good (i.e., conforming) parser, interpretation is very hard</li>
						<li>copy/paste can become hard or impossible</li>
					</ul>
					<li>Namespaces can be <a href="http://lists.xml.org/archives/xml-dev/200204/msg00170.html">neurotic, psychotic, borderline, or normal</a></li>
					<li>Each of the insane cases complicates processing</li>
					<li>None of these has any real technical inaccuracies</li>
					<li>XML should be used with humans in mind</li>
				</ul>
			</slide>
			<slide>
				<title>Unhealthy Namespace Usages in Practice</title>
				<listing src="neurotic.xml" line="2-9"/>
				<listing src="borderline.xml" line="2-9"/>
				<listing src="psychotic.xml" line="2-9"/>
			</slide>
			<slide>
				<title>Elements and Attributes</title>
				<ul>
					<li>Namespaces often apply to elements and attributes</li>
					<ul>
						<li>if an element name has no prefix, it has no namespace or the default namespace associated</li>
						<li>if a name has a prefix, the prefix must be bound to a namespace name</li>
						<li>names like this are called <em>Qualified Names (QNames)</em></li>
					</ul>
					<li>Elements and Attributes are treated differently</li>
					<ul>
						<li>the default namespace only applies to unprefixed element names</li>
						<li>unprefixed attribute names are in no namespace</li>
						<li><link href="xsdl-1">XSDL</link> deals with this by <link href="xsdl-names">keeping attributes <q>local</q></link></li>
					</ul>
					<li>Applications should interpret QNames</li>
					<ul>
						<li>naïve implementations will break when processing unhealthy instances</li>
						<li>the mechanics of implementing namespaces are not very hard</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Other Usages</title>
				<ul>
					<li>Increasingly, QNames are used in content</li>
					<ul>
						<li><link href="xslt-1">XSLT</link> was the first specification using this</li>
						<li>many other technologies have followed</li>
					</ul>
				</ul>
				<pre><![CDATA[<xsl:template match="section" xmlns:mathml="http://www.w3.org/1998/Math/MathML/">
<xsl:if test="exists(.//mathml:*)">]]></pre>
				<ul>
					<li>Technically, everything is well-defined</li>
					<ul>
						<li>for processing, the namespace bindings must be known</li>
						<li>copy/paste on a textual basis may not work or even work wrong</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Defining Namespaces</title>
			<slide>
				<title>Any URI is Possible</title>
				<ul>
					<li>A namespace name is a URI, that's all!</li>
					<ul>
						<li>it may not be accessible (because of the URI scheme)</li>
						<li>when retrieving it, nothing may be returned</li>
						<li>when retrieving it, something may be returned</li>
					</ul>
					<li>The only important thing is <em>the name</em></li>
					<ul>
						<li>the name is mentioned in the documentation</li>
						<li>if you know the documentation, you known the name</li>
						<li>shared names mean shared knowledge</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Namespace Definitions</title>
				<ul>
					<li>Namespaces can be defined by a DTD (<a href="http://www.w3.org/TR/xhtml1/#strict">XHTML</a>)</li>
					<li>Namespaces can be defined by XSDL (<a href="http://www.w3.org/TR/soap12-part1/#tabnsprefixes">SOAP</a>)</li>
					<li>Namespaces can be defined by RELAX NG (<a href="http://www.w3.org/TR/xhtml2/conformance.html#strict">XHTML 2.0</a>)</li>
					<li>Namespaces can be defined by prose (<a href="http://www.w3.org/TR/xslt#xslt-namespace">XSLT</a>)</li>
					<li>If schemas are provided, additional information is required</li>
					<ul>
						<li>it is unlikely that a namespace can be fully described by a schema</li>
						<li>additional constraints and semantics are specified in prose</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Structured Namespaces</title>
				<ul>
					<li>Namespaces have no structure</li>
					<ul>
						<li>a collection of names grouped by their namespace name</li>
						<li>inside the namespace, names have local meaning</li>
					</ul>
					<li>Namespace definitions to make up their own rules</li>
					<ul>
						<li>but then they must also make rules how to deal with conflicts</li>
					</ul>
					<li>XSDL <a href="http://www.w3.org/TR/xmlschema-1/#concepts-nameSymbolSpaces">structures the namespace defined by a schema</a></li>
					<ul>
						<li>the different <q>parts</q> of the namespace are called <em>symbol spaces</em></li>
						<li>all XSDL components have their own symbol space</li>
						<li><em>simple</em> and <em>complex types</em> share the same symbol space</li>
						<li>locally defined elements/attributes are in <q>sub symbol spaces</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Fixed or Extensible?</title>
				<ul>
					<li>Can a namespace change over time?</li>
					<ul>
						<li>may the namespace description become outdated? extended? replaced?</li>
						<li>this should be clearly documented in the namespace description</li>
					</ul>
					<li>The XML XML Namespace was widely believed <a href="http://www.w3.org/XML/1998/namespace">to be defined by XML</a></li>
					<ul>
						<li><xml>xml:lang</xml> and <xml>xml:space</xml> defined by XML</li>
						<li><xml>xml:base</xml> was added by <em>XML Base</em></li>
						<li><xml>xml:id</xml> was added by <em>xml:id</em></li>
					</ul>
					<li>When defining namespaces, plan ahead and publish everything</li>
					<ul>
						<li>dependencies, change management, and versioning issues are important</li>
						<li>there still is no accepted standard for namespace descriptions</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Namespace Descriptions</title>
				<img style="width : 90% ; margin : 2% ; " src="ns-description.png"/>
				<p class="quotenote"><a href="http://dret.net/netdret/publications#wil06h">Erik Wilde, <q>Structuring Namespace Descriptions</q>, 15th International World Wide Web Conference (WWW2006), Edinburgh, UK, May 2006.</a></p>
			</slide>
		</part>
		<part>
			<title>Processing Namespaces</title>
			<slide id="namespace-validity">
				<title>Namespaces and Validity</title>
				<ul>
					<li>Namespaces define an additional layer on top of XML</li>
					<ul>
						<li>they define additional semantics (assignment to namespaces)</li>
						<li>they define additional constraints (declaration and usage of namespaces)</li>
					</ul>
					<li>Namespace-awareness is a basic requirement for XML tools</li>
					<ul>
						<li>XML not compliant with XML Namespaces will break most tools</li>
						<li>processing namespaces should be done by tools</li>
						<li>a namespace-aware parser translates namespace declarations into nodes</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Namespaces in the Document</title>
				<listing src="mathml4.xml"/>
			</slide>
			<slide>
				<title>Namespaces in the Tree</title>
				<img src="xmlns-tree.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Name Spaces</title>
				<ul>
					<li><q>Bags of Names</q> with a URI as a label</li>
					<li>The URI does not necessarily return anything</li>
					<li>Namespaces can be defined in any way (e.g., schemas)</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xpath">
		<title short="XPath">XML Path Language (XPath)</title>
		<date>2007-09-20</date>
		<toc class="resources"><a href="xpath-chapter.pdf">XPath Chapter</a>&#160;· <a href="xpath-quickref.pdf">XPath QuickRef</a></toc>
		<toc class="abstract">XML structures data into a rather small number of different constructs, most notably elements and attributes. The <em>XML Path Language (XPath)</em> defines a way how to select parts of XML documents, so that they can be used for further processing. XPath's primary use in in <em>XSL Transformations (XSLT)</em>, but other XML technologies use it as well, e.g. XSDL. XPath is a very compact language with a syntax that resembles path expressions well-known from file systems. These path expressions, however, are generalized and therefore much more powerful than the rather simple path expressions in file systems. Because of its use in different XML technologies, XPath is one of the most important XML core technologies.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Why XPath?</title>
			<slide>
				<title>Selecting Parts of XML Documents</title>
				<ul>
					<li>XML is a syntax for trees</li>
					<ul>
						<li>it defines a way for how trees can be exchanged</li>
					</ul>
					<li>XML technologies should provide support for working with trees</li>
					<ul>
						<li>when receiving trees, access to the tree should be easy (DOM)</li>
						<li>validating trees should be easy (<link href="xsdl-1">XSDL</link>)</li>
						<li>mapping trees should be easy (<link href="xslt-1">XSLT</link>)</li>
						<li>querying tree collections should be easy (<link href="xquery-1">XQuery</link>)</li>
						<li>XPath is what regular expressions for text-based information</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Making Selection Reusable</title>
				<ul>
					<li>Different XML technologies need selection</li>
					<ul>
						<li><link href="xslt-1">XSLT</link> needs it for selecting parts and manipulating them</li>
						<li><link href="xsdl-1">XSDL</link> needs it for applying identity constraints</li>
						<li>DOM needs it for extracting parts from an XML tree</li>
						<li>XQuery needs it for writing XML-oriented queries</li>
					</ul>
					<li>XPath was created to be reusable</li>
					<ul>
						<li>XML experts should only learn one selection language</li>
						<li>this knowledge can be reused when learning new technologies</li>
						<li>implementations can reuse code libraries</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>How XPath Evolved</title>
				<ul>
					<li>XSL was designed as the new XML stylesheet language</li>
					<ol>
						<li><link href="xslt-1">XSL Transformations (XSLT)</link> transform the input document</li>
						<li><em>XSL Formatting Objects (XSL-FO)</em> is what they will transform it to</li>
					</ol>
					<li>XSLT was designed to work on arbitrary XML input documents</li>
					<ul>
						<li>started as a part of XSL (<a href="http://www.w3.org/TR/1998/WD-xsl-19981216">WD-xsl-19981216</a> → <a href="http://www.w3.org/TR/1999/WD-xslt-19990421">WD-xslt-19990421</a>)</li>
						<li>for selecting parts of the transformation input, a selection mechanism had to be provided</li>
					</ul>
					<li>XPath was turned into a standalone specification</li>
					<ul>
						<li>started as a part of XSLT (<a href="http://www.w3.org/TR/1999/WD-xslt-19990421">WD-xslt-19990421</a> → <a href="http://www.w3.org/1999/07/WD-xslt-19990709">WD-xslt-19990709</a>)</li>
						<li>reused in a number of other W3C specifications (XSDL, DOM)</li>
					<li>Complete overhaul for XSLT 2.0 and XQuery</li>
					<ul>
						<li><a href="http://www.w3.org/TR/xpath20/">XPath 2.0</a> as the core language</li>
						<li>a much larger set of <a href="http://www.w3.org/TR/xpath-functions/">functions and operators</a></li>
						<li>the underlying <a href="http://www.w3.org/TR/xpath-datamodel/">data model</a> which describes the foundation</li>
					</ul>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>How XPath Works</title>
			<part id="xpath-tree">
				<title>The XPath Tree Model</title>
				<slide>
					<title>Starting from the Infoset</title>
					<ul>
						<li>XPath operates on an abstract data model</li>
						<ul>
							<li>a tree derived from the <link href="infoset"/></li>
							<li>a simplification (another one!) of the underlying XML</li>
						</ul>
						<li>The Infoset is turned into an <em>XPath node tree</em></li>
						<ul>
							<li>11 infoset item types → 7 XPath node tree node types</li>
							<li>character items are merged into text nodes</li>
							<li>namespace declarations are no longer visible as attributes</li>
						</ul>
					</ul>
				</slide>
				<slide id="not-xpath">
					<title>What is <u>Not</u> in the XPath Tree</title>
					<ul>
						<li>The same things which are <link href="not-infoset">not in the Infoset</link></li>
						<ul>
							<li>the order of attributes in a start tag</li>
							<li>the types of quotes around attribute values</li>
							<li>character references and entities (<code>&amp;#xFC;</code>/<code>&amp;uuml;</code> → <code>ü</code>)</li>
						</ul>
						<li>And some more …</li>
						<ul>
							<li>namespace declarations are no longer visible as attributes</li>
							<li>notations and unexpanded entity references</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XPath Evaluation</title>
				<slide>
					<title>Tree In / Selection Out</title>
					<ul>
						<li>XPath evaluates an expression based on a tree</li>
						<li>Where the tree comes from is out of XPath's scope</li>
						<li>The result of the evaluation is a selection</li>
						<ul>
							<li><code>//img[not(@alt)]</code> → select all images which have no <code>alt</code> attribute</li>
							<li><code>count(//img)</code> → return the number of images</li>
							<li><code>/descendant::img[3]/@src</code> → return the third image's <code>src</code> URI</li>
							<li><code>starts-with(/html/@lang, 'en')</code> → test whether the document's language is english</li>
						</ul>
						<li>Syntax errors may occur</li>
						</ul>
				</slide>
			</part>
		</part>
		<part>
			<title short="Location Paths">XPath Location Paths</title>
			<slide>
				<title>Location Path Structure</title>
				<ul>
					<li>Each location path consists of <em>Location Steps</em></li>
					<ul>
						<li>location steps are separated by <q><code>/</code></q>, like path names in file systems</li>
					</ul>
					<li>Similarities between XPath location paths and file systems</li>
					<ol>
						<li>nodes in the <link href="xpath-tree">XPath tree</link> have different types</li>
						<li>the <link href="xpath-nodetest">type and number of nodes selected by one step</link></li>
						<li>the <link href="xpath-axes">direction in which each step moves</link></li>
						<li>additional <link href="xpath-predicates">filters for selecting specific nodes</link></li>
					</ol>
					<li>Differences between XPath location paths and file systems</li>
					<ol>
						<li>XPaths may return <link href="xpath-expressions">other data types than nodes</link></li>
						<li>XPath provides a <link href="xpath-functions">built-in function library</link></li>
					</ol>
				</ul>
			</slide>
			<part>
				<title short="Node Tests">XPath Node Tests</title>
				<slide>
					<title>File System vs. XPath Paths</title>
					<table style="margin : 5% ; " width="85%">
						<tr>
							<th>File System Path:</th>
							<td align="center"><code>/</code></td>
							<td align="center"><code>usr</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>local</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>apache</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>bin</code></td>
							<td align="center"><code>/</code></td>
						</tr>
						<tr>
							<th># Selected Nodes:</th>
							<td align="center">1</td>
							<td align="center">→ 1</td>
							<td align="center">→</td>
							<td align="center">1</td>
							<td align="center">→</td>
							<td align="center">1</td>
							<td align="center">→</td>
							<td align="center">1</td>
						</tr>
					</table>
					<table style="margin : 5% ; " width="85%">
						<tr>
							<th>XPath:</th>
							<td align="center"><code>/</code></td>
							<td align="center"><code>html</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>body</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>table</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>thead</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>tr</code></td>
						</tr>
						<tr>
							<th># Selected Nodes:</th>
							<td align="center">1</td>
							<td align="center">→ 1</td>
							<td align="center">→</td>
							<td align="center">1</td>
							<td align="center">→</td>
							<td align="center">6</td>
							<td align="center">→</td>
							<td align="center">4</td>
							<td align="center">→</td>
							<td align="center">12</td>
						</tr>
					</table>
				</slide>
				<slide id="xpath-nodetest">
					<title>Tests for Nodes</title>
					<ul>
						<li>Name tests</li>
						<ul>
							<li>testing for a particular name (elements/attributes): <code>/html/head/title</code></li>
							<li>wildcards (testing for any name): <code>/html/head/*</code></li>
						</ul>
						<li>Node type tests</li>
						<ul>
							<li>text nodes: <code>text()</code></li>
							<li>comment nodes: <code>comment()</code></li>
							<li>any nodes: <code>node()</code></li>
						</ul>
						<li>Processing instruction tests</li>
						<ul>
							<li>any PI: <code>processing-instruction()</code></li>
							<li>specific PI: <code>processing-instruction("xml-stylesheet")</code></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xpath-axes">
				<title short="Axes">XPath Axes</title>
				<slide>
					<title>Where Do You Want to Go Today?</title>
					<ul>
						<li>File system paths are one direction only</li>
						<ul>
							<li>always one level down in the file system hierarchy</li>
							<li><code>.</code> and <code>..</code> are clever directory shortcuts</li>
							<li>other directions supported by tools (e.g., <code>find</code>)</li>
						</ul>
						<li>XPath allows steps is different directions</li>
						<ul>
							<li>the default direction is <code>child</code></li>
							<li>other directions are explicitly specified: <code>descendant::a</code></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Axis Peculiarities</title>
					<ul>
						<li>Attributes and Namespaces are <u>not</u> the children of elements, but …</li>
						<li>… elements are their attributes' parent!</li>
						<ul>
							<li>very counter-intuitive</li>
							<li>very convenient</li>
						</ul>
						<li>Attributes and Namespaces are always leaves in the node tree</li>
						<li>Attribute nodes <u>have</u> the attribute value as their value</li>
						<li>Namespace nodes <u>have</u> the namespace name (i.e., a URI) as their value</li>
						<li>Namespace nodes exist because of namespace declarations</li>
						<ul>
							<li>in the XPath node tree, only the namespace nodes are visible</li>
							<li>the namespace declaration attributes (<code>xmlns</code>) are invisible</li>
							<li>one namespace declaration potentially creates many namespace nodes</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Axes</title>
					<img style="height : 75% ; margin : 2% ; " src="xpath-axes.png" title="XPath Axes"/>
				</slide>
				<slide>
					<title>Putting it all Together</title>
					<ul>
						<li>XPath location paths use a simple syntax</li>
						<ul>
							<li>sequence of location steps, separated by <q><code>/</code></q></li>
						</ul>
						<li>Each location step uses a simple structure (<code>preceding::p[@class="warning"]</code>)</li>
						<ol>
							<li>an axis followed by <q><code>::</code></q> (no axis uses the default axis <code>child</code>)</li>
							<li>a <link href="xpath-nodetest">node test</link></li>
							<li><em>0-n</em> <link href="xpath-predicates"/> enclosed in <q><code>[]</code></q></li>
						</ol>
						<li>Location paths can be abbreviated</li>
						<ul>
							<li><code>child::</code> can be omitted (default axis)</li>
							<li><code>attribute::</code> can be written as <q><code>@</code></q></li>
							<li><q><code>.</code></q> is an abbreviation for <code>self::node()</code></li>
							<li><q><code>..</code></q> is an abbreviation for <code>parent::node()</code></li>
							<li><q><code>//</code></q> is an abbreviation for <code>/descendant-or-self::node()/</code></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xpath-predicates">
				<title>Predicates</title>
				<slide>
					<title>Location Step Filters</title>
					<ul>
						<li>Predicates are filters for each location step</li>
						<ul>
							<li>there can be any number of filters (<em>0-n</em>)</li>
							<li>each filter is applied to each selected node individually</li>
						</ul>
						<li>Each predicate is an XPath and evaluated as a boolean</li>					
						<ul>
							<li>the context of this evaluation is the node for which the filter is evaluated</li>
							<li>if the result is a number, it is compared with the <code>position()</code> function (<code>/descendant::a[5]</code>)</li>
						</ul>
						<li>Predicates always reduce the set of selected nodes</li>
						<ul>
							<li>as corner cases, the set of selected nodes does not change or is empty</li>
							<li>predicates are used in the majority of non-trivial XPath location paths</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Location Path Processing</title>
					<ul>
						<li>Location paths are processed in a very simple way</li>
						<ol>
							<li>start with a given context</li>
							<li>for each location step, repeat the following steps:</li>
							<li>based on the context and the axis, select the nodes on this axis</li>
							<li>reduce this selection to the nodes identified by the node test</li>
							<li>sequentially apply all filters to each of these nodes</li>
							<li>take the remaining node set as the context for the next location step</li>
						</ol>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xpath-expressions">
			<title>XPath Expressions</title>
			<slide>
				<title>Beyond Location Paths</title>
				<ul>
					<li>XPath is a full expression language</li>
					<ul>
						<li>any evaluated expression in XSLT is an XPath</li>
						<li>XPath must be able to calculate operate on non-XML data types</li>
					</ul>
					<li>XPath uses a very simple data model</li>
					<ol>
						<li>node sets: <code>//img[not(@alt)]</code></li>
						<li>number: <code>count(//img)</code></li>
						<li>string: <code>/descendant::img[3]/@src</code></li>
						<li>boolean: <code>starts-with(/html/@lang, 'en')</code></li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>XPath Usages</title>
				<ul>
					<li>XPath is used in different technologies</li>
					<ul>
						<li>XSLT uses XPath as its expression language</li>
						<li>XSDL uses XPath for selecting identity constraint nodes</li>
						<li>DOM uses XPath as a way to select DOM nodes</li>
					</ul>
					<li>Depending on the environment, expression must yield certain results</li>
					<ul>
						<li>for conditionals, a boolean must be returned</li>
						<li>iterations (in XSLT) only loop over nodes</li>
						<li>when printing out text, a string must be produced</li>
					</ul>
					<li>XPath has built-in rules for casting types</li>
					<ul>
						<li>node set → boolean: empty is false, non-empty is true</li>
						<li>node → string: take the <em>string value</em> (i.e., concatenate all text node descendants)</li>
						<li>string → number: interpret as decimal notation (otherwise return <q><code>NaN</code></q>)</li>
						<li>XPaths often return surprising results (<code>//a[starts-with(@href, https)]</code>)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xpath-functions">
			<title>XPath Functions</title>
			<slide>
				<title>Function Library</title>
				<ul>
					<li>XPath has a small library of built-in functions</li>
					<ul>
						<li>useful for basic XPath-level functions</li>
						<li>other specs are allowed to extend it (XSLT does it)</li>
					</ul>
					<li>XPath functions return results of various data types</li>
					<ul>
						<li>boolean: <code>boolean, contains, false, lang, not, starts-with, true</code></li>
						<li>number: <code>ceiling, count, floor, last, number, position, round, string-length, sum</code></li>
						<li>string: <code>concat, local-name, name, namespace-uri, normalize-space, string, substring, substring-after, substring-before, translate</code></li>
						<li>node set: <code>id</code></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Using Functions</title>
				<ul>
					<li>Functions and location paths are orthogonal</li>
					<ul>
						<li>each construct may be based on the other</li>
						<li>it is possible to nest them arbitrarily</li>
						<li>predicates often contain functions</li>
						<pre>//a[substring(@href,string-length(@href)-2)='pdf']</pre>
					</ul>
					<li>XPaths can become powerful and complex</li>
					<ul>
						<li>writing some code or thinking about an XPath?</li>
						<li>XPaths are more declarative</li>
						<li>they may be more robust against changes in the XML schema</li>
						<li>they can be optimized by a smart XPath implementation</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Limitations of XPath</title>
			<slide>
				<title>XPath Selects</title>
				<ul>
					<li>Query languages select and recombine</li>
					<ol>
						<li>look up all addresses by zip code</li>
						<li>for each zip code, count the number of addresses</li>
					</ol>
					<li>XSLT fills in the missing parts (as a programming language)</li>
					<ul>
						<li>XSLT can construct XML and re-apply XPath</li>
					</ul>
					<li>XQuery fills in the missing parts (query-wise)</li>
					<ul>
						<li>80% of XQuery are XPath (in version 2.0, though)</li>
						<li>the remaining 20% are bindings, constructors, and glue</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XPath is Important</title>
				<ul>
					<li>XPath is a basic tool of the XML toolbox</li>
					<li>XPath is reused in various XML technologies</li>
					<li>XPath selects parts of an XML document</li>
					<li>XPath can do more general things by using expressions</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt-1">
		<title short="XSLT 1">XML Transformations (XSLT) – Part I</title>
		<date>2007-09-25</date>
		<toc class="resources"/>
		<toc class="abstract">Because XML can be used to represent any vocabulary (often defined by some schema), the question is how these different vocabularies can be processed and maybe transformed into something else. This <q>something else</q> may be another XML vocabulary (a common requirement in B2B scenarios), or it may be HTML (a common scenario for Web publishing). Using <em>XSL Transformations (XSLT)</em>, mapping tasks can be implemented easily. XSLT leverages XPath's expressive power in a rather simple programming language, the programs are often called <em>stylesheets</em>. For easy tasks, XSLT mappings can be specified without much real <q>programming</q> going on, by simply specifying how components of the source markup are mapped to components of the target markup.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XPath and XSLT</title>
			<ul>
				<li>XPath is an expression language</li>
				<ul>
					<li>location paths let you select parts of an XML document tree</li>
					<li>expressions in general may have other data types as well (string, number, boolean)</li>
				</ul>
				<li>XSLT is a programming language based on XPath</li>
				<ul>
					<li>XSLT defines the structures for the control flow within the program</li>
					<li>in all the places where something is evaluated, XPaths are being used</li>
					<li>sometimes, one can substitute for the other</li>
				</ul>
			</ul>
			<listing src="xslt-vs-xpath.xsl" line="5-13"/>
		</slide>
		<slide>
			<title>XSLT Syntax</title>
			<img src="xml-technology-syntaxes.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<slide>
			<title>XSLT Executive Summary</title>
			<ul>
				<li>XSLT is an XML-oriented programming language</li>
				<li>XSLT uses XML as its syntax</li>
				<li>XSLT is a weakly typed language</li>
				<li>XSLT is not designed for large programming tasks</li>
				<li>XSLT is the standard language for XML-to-XML transformations</li>
				<li>XSLT is very simple and often too simple</li>
				<li><link href="xslt20-1">XSLT 2.0</link> is much more complex and powerful</li>
			</ul>
		</slide>
		<slide>
			<title>XSLT as a Programming Language</title>
			<ul>
				<li>XSLT is a functional programming language</li>
				<ul>
					<li>fundamentally different from the usual languages</li>
					<li>not important for very simple mapping applications</li>
					<li>important for writing more complex transformations</li>
					<li>hard to get used to for procedurally trained people</li>
				</ul>
				<li>XSLT has built-in behavior for tree traversal</li>
				<ul>
					<li>XPaths allows you to select parts of the document tree</li>
					<li>XSLT's default behavior is to traverse the complete tree</li>
					<li>the idea of <q>default behavior</q> may seem strange</li>
				</ul>
			</ul>
		</slide>
		<part id="xslt-examples">
			<title>Simple Examples</title>
			<slide>
				<title>My First XSLT</title>
				<ul>
					<li>XSLT uses a simple environment</li>
					<ul>
						<li>all you need is an <em>XSLT processor</em> (<a href="http://www.saxonica.com/">Saxon</a> recommended)</li>
					</ul>
					<li>Some interesting observations</li>
					<ul>
						<li>it is an XML document (using the <a href="http://www.w3.org/TR/xslt#xslt-namespace">XSLT Namespace</a>)</li>
						<li>it contains no visible code (no statements)</li>
						<li>when being applied (i.e., executed), it produces a result</li>
					</ul>
				</ul>
				<listing src="first.xsl"/>
			</slide>
			<slide>
				<title>Why does it Work?</title>
				<ul>
					<li>The <q>text</q> of the document is produced</li>
					<ul>
						<li>technically, it is the concatenation of all text nodes</li>
						<li>this works with all XML input documents</li>
					</ul>
					<li>XSLT by default traverses the document tree</li>
					<ul>
						<li>it copies all text nodes</li>
						<li>it works its way through the document recursively</li>
						<li>this behavior is unusual for a programming language</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>My Second XSLT</title>
				<listing src="second.xsl"/>
			</slide>
			<slide>
				<title>How does it Work?</title>
				<ul>
					<li>Text output rather than XML output</li>
					<li>Overriding the default behavior</li>
					<ul>
						<li>new rules for how to recurse through the document tree</li>
						<li>the rules are <q>applied</q> <em>by the XSLT processor</em></li>
						<li>the execution of the XSLT code is controlled <em>by the XSLT processor</em></li>
					</ul>
					<li>Traversing the document tree in XSLT is easy</li>
					<ul>
						<li>this is what XSLT has been designed for</li>
						<li>trying to avoid this pattern leads to bad code and bad results</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>My Third XSLT</title>
				<listing src="third.xsl" line="3-21"/>
			</slide>
			<slide>
				<title>How Mappings Work</title>
				<ul>
					<li>All non-XSLT elements are <em>literal result elements</em></li>
					<ul>
						<li>their content is processed as usual</li>
						<li>they may contain XSLT or literal result elements</li>
					</ul>
					<li>XSLT elements in the stylesheet are instructions</li>
					<ul>
						<li>they are executed and have some predefined behavior</li>
						<li>if they produce results, these go to the result tree as well</li>
					</ul>
					<li>One-template XSLT is a good way to start with XSLT</li>
					<ul>
						<li>avoiding the learning curve associated with <link href="xslt-templates"/></li>
						<li>for easy mapping tasks, this pattern often is sufficient</li>
						<li>for complex tasks, this is the XSLT equivalent of <q>spaghetti code</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title><q>Hello World</q> in XSLT</title>
				<ul>
					<li>XSLT always transforms an XML document</li>
					<ul>
						<li>this is hard-coded in the <link href="xslt-processing-model"/></li>
					</ul>
					<li>Simply generating output is impossible</li>
					<ul>
						<li><q>hello world</q> therefore ignores the input</li>
						<li>anything can be the input (including the XSLT itself)</li>
					</ul>
				</ul>
				<listing src="helloworld.xsl"/>
			</slide>
		</part>
		<part>
			<title>XSLT Instructions</title>
			<slide>
				<title>XSLT is RISC</title>
				<ul>
					<li>XSLT has a <a href="http://www.w3.org/TR/xslt#element-syntax-summary">small set of instructions</a></li>
					<ul>
						<li>the language was designed to run in a restricted environment</li>
						<li>the language was designed for a specific task</li>
						<li>much of the language's power lies in XPath</li>
					</ul>
					<li>XPath is the CISC part of XSLT</li>
					<ul>
						<li>XPath is a complex high-level language</li>
						<li>it is specialized for the task the language is designed to do</li>
						<li>it can be highly optimized</li>
						<li>writing the XPaths often is the most challenging part of XSLT</li>
					</ul>
					<li>Starting with XSLT should improve simple mappings</li>
				</ul>
			</slide>
			<slide id="xslt-iterations">
				<title>Iterations</title>
				<ul>
					<li>XSLT can only iterate over node sets</li>
					<ul>
						<li>any other problem has to be solved recursively</li>
						<li>iterating over node sets often is what you want to do</li>
					</ul>
					<li>Applying the same code to all of the nodes</li>
					<ul>
						<li>works great if all nodes require the same processing</li>
						<li>is of limited use when processing needs to be conditional</li>
					</ul>
				</ul>
				<listing src="third.xsl" line="11-17"/>
			</slide>
			<slide id="xslt-conditionals">
				<title>Conditional Instructions</title>
				<ul>
					<li>Programming languages usually provide if-then-else</li>
					<ul>
						<li>XSLT has an if-then: <xslte>if</xslte></li>
						<li>and an if-then-(elif-then)*-else: <xslte>choose</xslte></li>
					</ul>
					<li>Simple handling of special cases</li>
					<ul>
						<li>having few and reasonably sized conditionals is ok</li>
						<li>having deeply nested and very long conditionals is a problem</li>
						<li>as in all programming languages, the latter case should user other mechanisms</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>My Third XSLT (II)</title>
				<listing src="third-if.xsl" line="10-22"/>
			</slide>
			<slide>
				<title>My Third XSLT (III)</title>
				<listing src="third-choose.xsl" line="20-38"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSLT is Simple</title>
				<ul>
					<li>XSLT is a simple programming language</li>
					<li>XSLT's processing model is useful but unusual</li>
					<li>XPath competence is essential for XSLT</li>
					<li>Programming requires practice</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt-2">
		<title short="XSLT 2">XML Transformations (XSLT) – Part II</title>
		<date>2007-09-27</date>
		<toc class="resources"/>
		<toc class="abstract">XSLT processes documents by matching nodes in the document tree to <em>templates</em>, which then are executed to process these nodes. This process of matching and executing templates is the core of XSLT's processing model. XSLT has built-in templates which complement the user-supplied templates, so that the XSLT processor always finds a template to execute. Templates can conflict, and it is then necessary to resolve this conflict by finding the <q>best match</q> of all matching templates. This <em>conflict resolution</em> process also is a very important component of the XSLT processing model.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XSLT Programming</title>
			<ul>
				<li>Stylesheets (XSLT programs) are a set of <em>templates</em></li>
				<li>Simple mappings can be defined in one template</li>
				<ul>
					<li>the template creates the result document's structure</li>
					<li><link href="xslt-iterations"/> and <link href="xslt-conditionals"/> provide some flexibility for processing</li>
					<li>the resulting code is always <q>spaghetti code</q></li>
				</ul>
				<li>Non-trivial XSLT programs use more than one template</li>
				<ul>
					<li>different templates are responsible for mapping subtrees of the input document</li>
					<li>the whole process is <em>driven by the document</em></li>
					<li>XSLT programming needs some time to get used to</li>
				</ul>
				<li>Like every tool, XSLT can be misused</li>
				<ul>
					<li>for simple problems, XSLT can be used like a regular programming language</li>
					<li>for harder problems, this is impossible (missing language constructs)</li>
				</ul>
			</ul>
		</slide>
		<part id="xslt-processing-model">
			<title>XSLT Processing Model</title>
			<slide>
				<title>Input and Output</title>
				<img style="width : 90% ; margin : 2% ; " src="xslt-model.png"/>
			</slide>
		</part>
		<part id="xslt-templates">
			<title>Templates</title>
			<slide>
				<title>Templates as Building Blocks</title>
				<ul>
					<li>Templates are the main unit of code</li>
					<ul>
						<li>the <xslta>match</xslta> attribute defines which nodes are processed by a template</li>
						<li>whenever such a node needs to be processed, the template is executed (<q>applied</q>)</li>
						<li>XPaths are interpreted with the matched node as context</li>
					</ul>
					<li>Templates contain a mix of <link href="xslt-literal"/> and XSLT code</li>
					<ul>
						<li><link href="xslt-literal"/> and text nodes are copied to the result tree</li>
						<li>XSLT elements are executed (depending on their semantics)</li>
						<li><xslte>apply-templates</xslte> plays a special role because it selects nodes to be processed</li>
					</ul>
					<li>The template application process is special</li>
					<ul>
						<li>probably the most challenging aspect when learning the language</li>
						<li>XSLT is much easier to use when understanding the underlying principle</li>
					</ul>
				</ul>
			</slide>
			<slide id="xslt-algorithm">
				<title>Basic Mechanics</title>
				<ol>
					<li>The <em>source node list</em> contains only the root node</li>
					<li>The result tree is created by inserting the result from processing a node from the source node list</li>
					<li>Processing typically puts more nodes on the source node list</li>
					<li>The process is repeated until the source node list is empty</li>
				</ol>
				<listing src="second.xsl" line="5-15"/>
			</slide>
			<slide>
				<title>Template Selection</title>
				<ul>
					<li>Templates are connected through two statements</li>
					<ul>
						<li><xslte>apply-templates</xslte> selects which are put on the source node list</li>
						<li>the XSLT processor selects the best <xslte>template</xslte> and executes it</li>
					</ul>
					<li>What happens if there is no template?</li>
					<ul>
						<li>templates use <link href="xslt-pattern"/> to specify their applicability</li>
						<li>users may not specify a template for a node they select</li>
						<li>instead of an error, <link href="xslt-builtin"/> are used to handle this situation</li>
					</ul>
				</ul>
			</slide>
			<slide id="xslt-pattern">
				<title>Patterns</title>
				<ul>
					<li>Patterns are a subset of XPath</li>
					<ul>
						<li>they are used to specify to which nodes certain language constructs apply</li>
						<li>patterns specify a set of conditions on a node</li>
					</ul>
					<li>The specification is short, but hard to understand</li>
					<ul>
						<li><a href="http://www.w3.org/TR/xslt#patterns"><q>A node matches a pattern if the node is a member of the result of evaluating the pattern as an expression with respect to some possible context; the possible contexts are those whose context node is the node being matched or one of its ancestors.</q></a></li>
					</ul>
					<li>Practically, patterns are node tests, node contexts, and predicates</li>
					<ul>
						<li><q><code>*</code></q> matches any element</li>
						<li><q><code>tr</code></q> matches <elem>tr</elem> elements</li>
						<li><q><code>thead/tr</code></q> matches <elem>tr</elem> elements within <elem>thead</elem> elements</li>
						<li><q><code>p[@class='warning']</code></q> matches <elem>p</elem> elements with their <xml>class</xml> set to <code>warning</code></li>
						<li>these mechanisms can be combined (and connected by the union operator <q><code>|</code></q>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Pattern-Based Processing</title>
				<listing src="people-patterns.xsl" line="10-29"/>
			</slide>
		</part>
		<part id="xslt-builtin">
			<title>Built-In Templates</title>
			<slide>
				<title>XSLT Default Behavior</title>
				<ul>
					<li>Built into every XSLT processor</li>
					<ul>
						<li>covering all seven XPath node types</li>
						<li>the XSLT processor always finds a template to process a node</li>
					</ul>
					<li>Conflicts are thus also built into the language</li>
					<ul>
						<li>every user template is in conflict with a built-in template</li>
						<li><link href="xslt-conflictresolution"/> is a core concept of XSLT</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Root and Elements</title>
				<ul>
					<li>The most important node types</li>
					<ul>
						<li>every XML document has a root and at least one element</li>
					</ul>
					<li>The default behavior traverses the tree recursively</li>
					<ul>
						<li>the recursion only selects child nodes (the default is <xml>select="node()"</xml>)</li>
						<li>attributes are <u>not</u> children of the elements nodes!</li>
					</ul>
				</ul>
				<listing src="built-in.xsl" line="4-6"/>
				<listing src="first.xsl"/>
			</slide>
			<slide>
				<title>Text and Attributes</title>
				<ul>
					<li>These nodes create text output</li>
					<li>The processing does not continue with <xslte>apply-templates</xslte></li>
					<ul>
						<li>text and attribute nodes are always leaf nodes</li>
					</ul>
					<li>Attributes are not selected by the built-in rules</li>
					<ul>
						<li>they are only processed when selected by a user instruction</li>
					</ul>
				</ul>
				<listing src="built-in.xsl" line="12-14"/>
			</slide>
			<slide>
				<title>Processing Instructions and Comments</title>
				<ul>
					<li>These nodes are ignored</li>
					<li>Processing instructions and comments are selected by the built-in rules</li>
					<ul>
						<li>the built-in behavior can be overwritten if required</li>
					</ul>
				</ul>
				<listing src="built-in.xsl" line="16-16"/>
			</slide>
		</part>
		<part id="xslt-conflictresolution">
			<title>Conflict Resolution</title>
			<slide>
				<title>Template Selection</title>
				<ul>
					<li>XSLT processes <link href="xslt-algorithm">nodes on the source node list</link></li>
					<li>For processing each node, the <q>best</q> template must be found</li>
					<li>XSLT supports incremental development</li>
					<ul>
						<li>templates can be added for more specialized processing</li>
						<li>other code does not have to be changed at all</li>
						<li>the source node list provides support for this decoupling</li>
					</ul>
					<li>For simple cases, the default mechanism is sufficient</li>
					<ul>
						<li>advanced XSLT programming sometimes requires manual intervention</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Template Selection</title>
				<ol>
					<li>All templates with a <xslta>match</xslta> attribute</li>
					<ul>
						<li>this excludes <link href="xslt-named-templates"/></li>
					</ul>
					<li>All templates with the same <em>mode</em></li>
					<ul>
						<li>part of the <xslte>apply-templates</xslte> instruction selecting the node</li>
					</ul>
					<li>The <link href="xslt-pattern">Pattern</link> must match</li>
					<li>If more than one template matches, order by <em>import precedence</em></li>
					<ul>
						<li>the import tree of the stylesheet is considered (this includes the built-in rules)</li>
					</ul>
					<li>If more than one template matches, order by <em>priority</em></li>
					<ul>
						<li>this sorts rules according to the specificity</li>
					</ul>
					<li>Execute resulting rule</li>
					<ul>
						<li>if still more than one, signal error or execute last in stylesheet</li>
					</ul>
				</ol>
			</slide>
			<slide>
				<title>Import Precedence</title>
				<img style="margin : 4% ; width : 90% ; " src="xslt-import-precedence.png"/>
			</slide>
			<slide>
				<title>Priorities</title>
				<ul>
					<li>Template priorities are computed</li>
					<ul>
						<li>a very simple pattern-based process</li>
						<li>a higher value means it is a better match</li>
					</ul>
					<li>Five steps are used to compute the priority</li>
					<ol>
						<li>templates using the union operator are treated as if there were multiple templates</li>
						<li>QNames and processing instructions are assigned a priority of <code>0</code></li>
						<li>Namespace-prefixed names are assigned a priority of <code>0.25</code></li>
						<li>other node tests with axis specifiers are assigned a priority of <code>-0.25</code></li>
						<li>all other patterns are assigned a priority of <code>0.5</code></li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Different Conflicts</title>
				<listing src="conflict-resolution.xsl"/>
				<listing src="conflict-resolution.xml"/>
			</slide>
			<slide>
				<title>Resolution Process</title>
				<table style="margin : 4% ; width : 90% ; " rules="groups">
					<colgroup span="1"/>
					<colgroup span="1"/>
					<colgroup span="5"/>
					<colgroup span="1"/>
					<thead>
						<tr>
							<th valign="bottom" rowspan="2">Pattern</th>
							<th valign="bottom" rowspan="2">Priority</th>
							<th colspan="5">Resolution Step</th>
							<th valign="bottom" rowspan="2">Manual<br/>Adjustment</th>
						</tr>
						<tr>
							<th>1</th>
							<th>2</th>
							<th>3</th>
							<th>4</th>
							<th>5</th>
						</tr>
					</thead>
					<tbody>
						<tr>
							<td align="right">Built-in: <q><code>text() | @*</code></q></td>
							<td/>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center"></td>
							<td align="center"></td>
							<td align="center"></td>
							<td align="center"></td>
						</tr>
						<tr>
							<td align="right">Built-in: <q><code>* | /</code></q></td>
							<td/>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center"></td>
							<td align="center"></td>
							<td align="center"></td>
						</tr>
						<tr>
							<td align="right"><q><code>*</code></q></td>
							<td align="center">-0.5</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center"></td>
							<td align="center"></td>
						</tr>
						<tr>
							<td align="right"><q><code>a</code></q></td>
							<td align="center">0.0</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center"></td>
							<td align="center"></td>
						</tr>
						<tr>
							<td align="right"><q><code>b/a</code></q></td>
							<td align="center">0.25</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center"></td>
						</tr>
						<tr>
							<td align="right"><q><code>c/b/a</code></q></td>
							<td align="center">0.25</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center">✓</td>
							<td align="center"><code>priority="1"</code></td>
						</tr>
					</tbody>
				</table>
			</slide>
			<slide>
				<title>Adjusting Priorities</title>
				<ul>
					<li>Computed priorities always lie between <code>-0.5</code> and <code>0.5</code></li>
					<li>Non-trivial patterns almost always have the priority <code>0.5</code></li>
					<li>Priorities can be set explicitly</li>
					<ul>
						<li><xslte>template match="…" priority="1"</xslte></li>
					</ul>
					<li>Managing priority values is up to the programmer</li>
					<ul>
						<li>it is rarely necessary to manage a large set of competing priorities</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Document-Driven Transformations</title>
				<ul>
					<li>XSLT often requires <em>document-driven</em> programming</li>
					<li>Imperative programmers are more used to control the program flow</li>
					<li>Document-driven processing is a powerful design principle</li>
					<li>Complex (highly variable) documents are much better handled by document-driven processing</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt-3">
		<title short="XSLT 3">XML Transformations (XSLT) – Part III</title>
		<date>2007-10-02</date>
		<toc class="resources"><a href="http://www-128.ibm.com/developerworks/xml/library/x-tipxsltrun/">XSLT Parameters</a></toc>
		<toc class="abstract">XSLT's template matching mechanism lets the XSLT processor find the <q>best match</q> to process a selected node. XSLT also supports a more traditional way of using templates, where they are called in a way very similar for function calls in most programming languages. Another interesting area of XSLT are variables and parameters, which are used for storing or passing values within XSLT code. One special property of XSLT variables is that they cannot be changed, which is a result of the functional design of the language.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XSLT Core Concepts</title>
			<ul>
				<li>XSLT can be used for very simple matching tasks</li>
				<ul>
					<li>a mostly static result tree can be produced</li>
					<li>XPaths can be used to fill in parts of the result tree</li>
					<li><link href="xslt-iterations"/> and <link href="xslt-conditionals"/> provide some flexibility for processing</li>
				</ul>
				<li>More complex transformation require a different approach</li>
				<ul>
					<li>instead of static structures, nodes are individually mapped to small structures</li>
					<li>these structure fragments together produce the result tree</li>
					<li>the process is <em>document-driven</em> and based on the <link href="xslt-processing-model"/></li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>How to Iterate</title>
			<slide>
				<title>Processing Nodes in XSLT</title>
				<ul>
					<li>XSLT supports two ways of processing nodes</li>
					<ul>
						<li><link href="xslt-iterations"/> loop over a set of selected nodes</li>
						<li><link href="xslt-templates"/> process nodes which have been put on the source node list</li>
					</ul>
					<li>Both mechanisms handle similar situations</li>
					<ul>
						<li>a set of nodes is selected and should be processed</li>
						<li>the code for processing has to available in a code block</li>
						<li><link href="xslt-iterations"/> put this code in the <xslte>for-each</xslte> body</li>
						<li><link href="xslt-templates"/> put this code in a reusable building block</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Homogeneous Processing</title>
				<ul>
					<li><link href="xslt-iterations"/> may lead to less modular code</li>
					<li>If the code has to be reused, they may not be a good solution</li>
					<ul>
						<li><link href="xslt-named-templates"/> may provide some support for reuse</li>
					</ul>
					<li>The selected nodes should require similar processing</li>
					<ul>
						<li>otherwise, the iteration code will contain many conditional statements</li>
					</ul>
					<li>Iterations should be restricted to small units of code</li>
				</ul>
			</slide>
			<slide>
				<title>Heterogeneous Processing</title>
				<ul>
					<li>If the node processing is very different, templates are better</li>
					<ul>
						<li>different templates are written for all nodes being selected</li>
						<li>no conditional code has to be written, selection is done by matching nodes to template patterns</li>
					</ul>
					<li>Templates can be reused</li>
					<ul>
						<li>the nodes appear in different locations and should be processed consistently</li>
						<li>the matching mechanism provides the ideal support for this scenario</li>
					</ul>
					<li>Extensible code should always use templates</li>
					<ul>
						<li>other stylesheets can import an existing stylesheet</li>
						<li>by selectively <q>overwriting</q> templates, the behavior can be customized</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Calling Templates</title>
			<slide>
				<title>Executing Templates</title>
				<ul>
					<li><link href="xslt-templates"/> usually have a <xslta>match</xslta> attribute</li>
					<ul>
						<li>these templates are part of XSLT's special pattern matching processing</li>
					</ul>
					<li>Templates may also be named units of code</li>
					<ul>
						<li>there is nothing special about these templates</li>
						<li>they are being called using a name like regular procedures</li>
					</ul>
				</ul>
			</slide>
			<slide id="xslt-named-templates">
				<title>Named Templates</title>
				<ul>
					<li><xslte>template</xslte> may also carry a <xslta>name</xslta> attribute</li>
					<li><xslte>call-template</xslte> calls these template by their <xslta>name</xslta></li>
					<li>Named templates have none of the special properties of XSLT template matching</li>
					<ul>
						<li>they are called by their name just like regular procedures</li>
						<li>they do not change the context of XPath evaluation</li>
					</ul>
					<li>Named templates are useful for modularizing code which is not tied to node types</li>
					<ul>
						<li>in most cases, they are called using <link href="xslt-parameters"/></li>
						<li>a typical application is the implementation of a facility for printing messages</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Variables and Parameters</title>
			<slide>
				<title>Programming Language Basics</title>
				<ul>
					<li>Variables in programming languages have different purposes</li>
					<ol>
						<li>defining a <em>name</em> for something so that it can be referred to</li>
						<li>associating this <em>name</em> with a <em>value</em> so that the value can be used</li>
						<li>providing a way to <em>update</em> the variable so that its value changes</li>
					</ol>
					<li>Variables in functional languages cannot change</li>
					<ul>
						<li>they are <em>immutable</em> (often called <em>constants</em> in other languages)</li>
						<li>more specifically, they are <em>dynamic constants</em> (i.e., can be computed at runtime)</li>
						<li>they are defined by giving them a <xpath>name</xpath> and referred to by <xpath>$name</xpath></li>
					</ul>
					<li>Variables in XSLT have no type (no static type checking possible)</li>
					<ul>
						<li>the value that they have is typed</li>
						<li>but a variable may have values of any type</li>
					</ul>
				</ul>
				<pre><![CDATA[<xsl:variable name="sum" select="$op1 + $op2"/>
<xsl:variable name="result" select="$sum * $factor"]]></pre>
			</slide>
			<part id="xslt-variables">
				<title>Variables</title>
				<slide>
					<title>Why Variables?</title>
					<ul>
						<li>Reuse of values in different locations</li>
						<ul>
							<li>texts required for the transformation</li>
							<li>facilitates better separation of structure and content</li>
						</ul>
						<pre><![CDATA[<xsl:value-of select="$email-prefix"/> <!-- $email-prefix = 'You have ' -->
<xsl:value-of select="count(//message)"/>
<xsl:value-of select="$email-suffix"/> <!-- $email-suffix = ' e-mail messages.' -->]]></pre>
						<li>Using the correct context is essential</li>
						<ul>
							<li>variables cannot be updated</li>
							<li>if they need to be <q>updated</q>, they have to be re-created</li>
						</ul>
						<li>Why are they called <q>variables</q> if they are constants?</li>
						<ul>
							<li>their value varies in different invocations of the context</li>
							<li>they are computed at runtime (dynamic constants) rather than statically</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Scope and Extent</title>
					<ul>
						<li>Variables can be global or local</li>
						<ul>
							<li>global variables are visible in all templates</li>
							<li>local variables are visible in their context (i.e., at <xpath>following-sibling::*/descendant-or-self::*</xpath>)</li>
							<li>local variables are allowed to <em>shadow</em> global (not local) variables</li>
						</ul>
						<li>Variable values may be assigned using the <xslta>select</xslta> attribute</li>
						<ul>
							<li>The XPath's result is the value of the variable</li>
						</ul>
						<li>Variables can contain arbitrary XPath code</li>
						<ul>
							<li>the code is executed in the same way as when constructing the result tree </li>
							<li>the <em>result tree fragment</em> is the value of the variable</li>
							<li>it can be used as a string (<xslte>value-of</xslte>) or as a tree (<xslte>copy-of</xslte>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Using Variables</title>
					<listing src="variable-assignment-wrong.xsl" line="4-15"/>
					<listing src="variable-assignment.xsl" line="4-17"/>
				</slide>
			</part>
			<part id="xslt-parameters">
				<title>Parameters</title>
				<slide>
					<title>Parameters vs. Variables</title>
					<ul>
						<li>Parameters are variables with additional semantics</li>
						<ul>
							<li>they are passed to their scope from the outside</li>
							<li>they are available within the scope like a variable (scopes are stylesheets and templates)</li>
							<li>like variables, they cannot be updated (and only global parameters can be shadowed)</li>
						</ul>
						<li>XSLT does not check proper parameter passing</li>
						<ul>
							<li>if a declared parameter is not passed, it gets a default value (specified or <xpath>''</xpath>)</li>
							<li>if a passed parameter is not declared, it is ignored</li>
							<li>like variables, parameters have no type (any value can be passed)</li>
							<li>XSLT's robustness makes it hard to spot programming errors</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Stylesheet Parameters</title>
					<ul>
						<li>Passed to the stylesheet when calling the stylesheet</li>
						<ul>
							<li>the exact way of specifying the parameters depend on the processor and the environment</li>
							<li>the passed values are available in the same way as global variables</li>
							<li>parameter checking has to be done by hand</li>
						</ul>
					</ul>
					<listing src="parameter-test.xsl" line="4-13"/>
				</slide>
				<slide>
					<title>Template Parameters</title>
					<ul>
						<li>Parameters can be passed to templates</li>
						<ul>
							<li>works with <xslte>apply-templates</xslte> and <xslte>call-template</xslte></li>
							<li><xslt>with-param</xslt> elements list the passed parameters</li>
							<li>parameter matching is done by name (there is no particular order to parameters)</li>
						</ul>
						<li>Templates can be programmed as parametrized components</li>
						<ul>
							<li>checking the signature has to be done by hand</li>
							<li><xslt>with-param</xslt> elements list the passed parameters</li>
							<li>parameter matching is done by name (there is no particular order to parameters)</li>
						</ul>
						<li>Parametrized template calls need a lot of markup</li>
						<ul>
							<li>XSLT's XML syntax makes the code hard to read</li>
						</ul>
					</ul>
					<pre>main param start = 1 ; param count = 10 ; {
	loop (0) };
loop param counter ; {
	print $start + $counter ;
	if ( $counter &lt; $count - 1) then 
		loop ($counter + 1) ; }</pre>
				</slide>
				<slide>
					<title>Parameter Passing</title>
					<listing src="parameters.xsl" line="4-21"/>
				</slide>
				<slide>
					<title>Message Facility</title>
					<listing src="message.xsl"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSLT Programming Environment</title>
				<ul>
					<li>Learning to use the right patterns takes some time</li>
					<li>Templates can be <em>applied</em> and <em>called</em></li>
					<li>XSLT variables have some unusual properties</li>
					<li>Parameters can be used for stylesheets and templates</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt-4">
		<title short="XSLT 4">XML Transformations (XSLT) – Part IV</title>
		<date>2007-10-04</date>
		<toc class="abstract">Advanced XSLT processing includes better control of the input and output documents, which can be finely controlled in terms of how whitespace is treated. Another interesting feature of XSLT are <em>keys</em>, which allow shorthand notations for frequently used access paths to nodes, and provide XSLT processors with more information for performance optimizations. Instructions for creating all possible kinds of nodes in the output tree make it possible to write code which generates element or attribute names based on runtime evaluations.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Controlling Documents</title>
			<slide>
				<title>XSLT Processing Model</title>
				<ul>
					<li>XSLT was built as a client-side language</li>
					<ul>
						<li>the browser has an XML document</li>
						<li>the XSLT is used to transform this XML</li>
						<li>the result is used for rendering the formatted document</li>
					</ul>
					<li>XSLT provides facilities for accessing additional documents</li>
					<ul>
						<li>an additional XML might contain localized texts for rendering</li>
						<li>like everything in XSLT, identification uses URIs</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Input Documents</title>
				<slide>
					<title>Opening Documents</title>
					<ul>
						<li>Initially, XSLT starts with the XPath node tree of the main document</li>
						<ul>
							<li>this step is outside of the control of the XSLT programmer</li>
						</ul>
						<li>Additional documents can be accessed using <xpath>document()</xpath></li>
						<ul>
							<li>the function accepts URIs, which are interpreted relative to the stylesheet</li>
							<li>only XML documents can be used, they will be parsed into an XPath tree</li>
						</ul>
						<li>XSLT Processors are smart enough to cache documents</li>
						<ul>
							<li>re-opening the same document will not re-parse it</li>
						</ul>
					</ul>
					<listing src="document.xsl" />
				</slide>
				<slide id="input-whitespace">
					<title>Whitespace in Documents</title>
					<ul>
						<li>Documents often contain many irrelevant whitespace text nodes</li>
						<ul>
							<li>many XML documents are pretty-printed for readability</li>
							<li>pretty-printing produces many line-feeds and tabs/spaces</li>
						</ul>
						<li>XSLT can be instructed to ignore whitespace nodes</li>
						<ul>
							<li><xslte>strip-space</xslte> lists all elements for which whitespace children should be ignored</li>
							<li>this may be a bit too much, because <link href="mixed-content"/> may contain significant whitespace</li>
						</ul>
						<pre><![CDATA[<p>do <u>not</u> <em>throw</em> <b>away</b> these whitespace nodes!</p>]]></pre>
						<li>XSLT can be instructed to preserve some whitespace nodes</li>
						<ul>
							<li><xslte>preserve-space</xslte> lists all elements for which whitespace children should be preserved</li>
							<li>usually, <xslte>preserve-space</xslte> lists the exceptions for <xslte>strip-space</xslte></li>
							<li>usually, <xslte>preserve-space</xslte> contains a list of all mixed content elements</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Controlling Whitespace</title>
					<listing src="strip-preserve.xsl" line="4-12"/>
				</slide>
			</part>
			<part>
				<title>Output Documents</title>
				<slide>
					<title>Serialization</title>
					<ul>
						<li>XSLT always produces a result tree</li>
						<ul>
							<li>stylesheet processing starts with an empty tree (root node only)</li>
							<li>XSLT code producing output then adds nodes to this tree</li>
							<li><xslte>text</xslte>, <xslte>value-of</xslte>, <xslte>copy-of</xslte>, <xslte>copy</xslte>, <xslte>element</xslte>, <xslte>attribute</xslte>, <xslte>comment</xslte>, <xslte>processing-instruction</xslte>, <link href="xslt-literal"/></li>
						</ul>
						<li>Serialization is the process of externalizing the final tree</li>
						<ul>
							<li><xslte>output</xslte> controls how the tree is serialized</li>
							<li><xml>xml</xml> writes the tree as an XML document</li>
							<li><xml>html</xml> writes the tree as an HTML document (<elem>img …</elem> instead of <elem>img …/</elem>)</li>
							<li><xml>text</xml> writes the tree's <em>string value</em> (the concatenation of all text nodes)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Multiple Output Documents</title>
					<ul>
						<li>XSLT 1.0 does not support more than one output document</li>
						<ul>
							<li><xslte>message</xslte> is another output channel, but not a document</li>
							<li>this was one of the most requested features for language improvements</li>
						</ul>
						<li>How can stylesheets produce more than one document?</li>
						<ul>
							<li>XSLT 1.0 may produce one document which is then post-processed</li>
							<li>XSLT 2.0 offers language facilities for more than one output document</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xslt-keys">
			<title>Keys</title>
			<slide>
				<title>Document Access</title>
				<ul>
					<li>Some parts of documents may be accessed frequently</li>
					<ul>
						<li><xpath>//person[@ss = $ss]/name/surname</xpath> for getting a name by social security number</li>
						<li>costs depend on document size and access frequency</li>
						<li>the document structure has to be used in all places where the name is used</li>
					</ul>
					<li>Keys provide access to frequently used nodes</li>
					<ul>
						<li><xpath>key('ssKey', $ss)/name/surname</xpath> is based on a predefined access path (the key)</li>
						<li>very easy to optimize even for very simple XSLT processors</li>
						<li>easier to understand from the programmer's point of view</li>
					</ul>
					<li>For nested predicates, non-optimized evaluation is very expensive</li>
					<ul>
						<li><xpath>//reference[@crossref = //reference[@title = $title]/@name]</xpath></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Declaring and Using Keys</title>
				<ul>
					<li>
						<xslte>key</xslte> defines a key on the stylesheet's top level</li>
					<ul>
						<li><xslte>key name="ssKey" match="person" use="@ss"/</xslte></li>
						<li><xslta>name</xslta> is used for referring to the key (most people use <q><xml>…Key</xml></q>)</li>
						<li><xslta>match</xslta> selects all nodes which will be part of the key (i.e., accessible through it)</li>
						<li><xslta>use</xslta> selects the value(s) which will retrieve the nodes</li>
					</ul>
					<li>
						<xpath>key()</xpath> is used for retrieving nodes from a key</li>
					<ul>
						<li>the first argument specifies the name of the key (defined by <xslte>key name="…" …</xslte>)</li>
						<li>the second argument specifies the value for which to look in that key</li>
						<li><xpath>key()</xpath> returns a node set (empty or any number of nodes)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML and XSLT for using a Key</title>
				<listing src="people.xml" line="2-12"/>
				<listing src="peoplekeys.xsl" line="4-6"/>
				<ul>
					<li>
						<xpath>key('givenNameKey', 'Thomas')</xpath> ≡ <xpath>//name[given = 'Thomas']</xpath>
					</li>
				</ul>
			</slide>
			<slide>
				<title>XSLT Key Structure</title>
				<table width="90%" cellpadding="10">
					<tr>
						<td>
							<table border="1" cellpadding="10">
								<tr>
									<th colspan="2">
										<xslt>givenNameKey</xslt>
									</th>
								</tr>
								<tr>
									<th>Node</th>
									<th>Value</th>
								</tr>
								<tr>
									<td>[1] Erik Thomas Wilde</td>
									<td>Erik</td>
								</tr>
								<tr>
									<td>[1] Erik Thomas Wilde</td>
									<td>Thomas</td>
								</tr>
								<tr>
									<td>[2] Thomas Plagemann</td>
									<td>Thomas</td>
								</tr>
								<tr>
									<td>[3] Bob Glushko</td>
									<td>Bob</td>
								</tr>
							</table>
						</td>
						<td>
							<table border="1" cellpadding="10">
								<tr>
									<th colspan="2">
										<xslt>countryKey</xslt>
									</th>
								</tr>
								<tr>
									<th>Node</th>
									<th>Value</th>
								</tr>
								<tr>
									<td>[1a] Erik Thomas Wilde</td>
									<td>de</td>
								</tr>
								<tr>
									<td>[1b] iSchool/UCB</td>
									<td>us</td>
								</tr>
								<tr>
									<td>[2a] Thomas Plagemann</td>
									<td>de</td>
								</tr>
								<tr>
									<td>[2b] IFI/UIO</td>
									<td>no</td>
								</tr>
								<tr>
									<td>[3a] Bob Glushko</td>
									<td>us</td>
								</tr>
								<tr>
									<td>[3b] iSchool/UCB</td>
									<td>us</td>
								</tr>
							</table>						
						</td>
					</tr>
				</table>
				<ul>
					<li><code>key('givenNameKey', 'Thomas')</code> → 2 nodes (1, 2)</li>
					<li><code>key('countryKey', 'us')</code> → 3 nodes (1b, 3a, 3b)</li>
					<li><code>key('countryKey', 'us')/self::entry</code> → 1 node (3a)</li>
				</ul>
			</slide>
			<slide>
				<title>Using Keys</title>
				<ul>
					<li>Finding nodes by intersecting <xpath>key()</xpath> results</li>
					<ul>
						<li><xpath>key()</xpath> always returns node sets</li>
						<li>interesting sets of nodes may be the intersection of several keys</li>
						<li>unfortunately, XPath does not provide an operator for set intersection</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Node Set Intersection</title>
				<p><xpath>$a[count(. | $b) = count($b)]</xpath>: Find all nodes in <code>$a</code> where the cardinality of <code>$b</code> does not change when adding this node to it. This means the node must be in <code>$b</code>, and it is in <code>$a</code> to start with.</p>
				<img src="xpath-intersection.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
		</part>
		<part>
			<title>Generating Result Nodes</title>
			<slide id="xslt-literal">
				<title>Literal Result Elements</title>
				<ul>
					<li>Non-XSLT elements are copied to the result tree</li>
					<ul>
						<li>this is the most common way of producing nodes</li>
						<li>in this case, the nodes' names are hard-coded in the stylesheet</li>
					</ul>
					<li>Attributes are also copied to the result tree</li>
					<ul>
						<li>this means the attribute will always be there</li>
						<li>conditional creation of attributes needs other language constructs</li>
					</ul>
				</ul>
			</slide>
			<slide id="xslt-element">
				<title>Producing Nodes Explicitly</title>
				<ul>
					<li>Element nodes can be produced by using <xslt>element</xslt></li>
					<ul>
						<li>the element <xstla>name</xstla> must be specified and can be computed</li>
						<li>additional instructions exist for all node types</li>
					</ul>
				</ul>
				<listing src="uppercaser.xsl" line="3-12"/>
			</slide>
		</part>
		<part>
			<title>Modularizing Stylesheets</title>
			<slide>
				<title>Including and Importing</title>
				<ul>
					<li>XSLT supports two ways of modularizing code</li>
					<ul>
						<li>including simply distributes code across multiple files</li>
						<li>importing creates a dependency and a hierarchy</li>
					</ul>
					<li><xslte>include</xslte> is mainly used for keeping files manageable</li>
					<ul>
						<li>it is used within managed projects</li>
					</ul>
					<li><xslte>import</xslte> is mainly used for reusing code from elsewhere</li>
					<ul>
						<li>it imports reused code and assigns this code a lower precedence</li>
						<li>local instructions can then overwrite (if required) some of the imported code</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Import Precedence</title>
				<img style="margin : 4% ; width : 90% ; " src="xslt-import-precedence.png"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSLT in Practice</title>
				<ul>
					<li>XSLT is a simple programming language</li>
					<li>The processing model needs some time to get used to</li>
					<li>Sometimes the language is really too simple</li>
					<li>If you are really interested in XSLT, learn XSLT 2.0!</li>
					<ul>
						<li>this will also take you 80% along the way to learning XQuery</li>
					</ul>
				</ul>
			</slide>
		</part>
	</presentation>
    <presentation id="xpath20">
        <title short="XPath 2.0">XML Path Language (XPath) 2.0</title>
        <date>2007-10-09</date>
        <toc class="resources"><a href="http://www.w3.org/TR/xpath20" title="W3C XPath 2.0 Spec">Spec</a></toc>
        <toc class="abstract">The <em>XML Path Language (XPath)</em> is one of the most useful and frequently used languages in the are of XML technologies. In its version 1.0, it is used in technologies such as XSLT, XSDL, DOM, and XML Tools. With <em>XPath 2.0</em>, the language has been greatly extended, the new version of XPath is the foundation for XSLT 2.0 and XQuery. XPath 2.0 provides support for regular expression matching, typed expressions, and contains language constructs for conditional and repeated evaluation.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Why XPath?</title>
			<slide>
				<title>Selecting Parts of XML Documents</title>
				<ul>
					<li>XML is a syntax for trees</li>
					<ul>
						<li>it defines a way for how trees can be exchanged</li>
					</ul>
					<li>XML technologies should provide support for working with trees</li>
					<ul>
						<li>when receiving trees, access to the tree should be easy (DOM)</li>
						<li>validating trees should be easy (<link href="xsdl-1">XSDL</link>)</li>
						<li>mapping trees should be easy (<link href="xslt-1">XSLT</link>)</li>
						<li>querying tree collections should be easy (<link href="xquery-1">XQuery</link>)</li>
						<li>XPath is what regular expressions for text-based information</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Making Selection Reusable</title>
				<ul>
					<li>Different XML technologies need selection</li>
					<ul>
						<li><link href="xslt-1">XSLT</link> needs it for selecting parts and manipulating them</li>
						<li><link href="xsdl-1">XSDL</link> needs it for applying identity constraints</li>
						<li>DOM needs it for extracting parts from an XML tree</li>
						<li>XQuery needs it for writing XML-oriented queries</li>
					</ul>
					<li>XPath was created to be reusable</li>
					<ul>
						<li>XML experts should only learn one selection language</li>
						<li>this knowledge can be reused when learning new technologies</li>
						<li>implementations can reuse code libraries</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>How XPath Evolved</title>
				<ul>
					<li>XSL was designed as the new XML stylesheet language</li>
					<ol>
						<li><em>XSL Transformations (XSLT)</em> transform the input document</li>
						<li><em>XSL Formatting Objects (XSL-FO)</em> is what they will transform it to</li>
					</ol>
					<li>XSLT was designed to work on arbitrary XML input documents</li>
					<ul>
						<li>started as a part of XSL (<a href="http://www.w3.org/TR/1998/WD-xsl-19981216">WD-xsl-19981216</a> → <a href="http://www.w3.org/TR/1999/WD-xslt-19990421">WD-xslt-19990421</a>)</li>
						<li>for selecting parts of the transformation input, a selection mechanism had to be provided</li>
					</ul>
					<li>XPath was turned into a standalone specification</li>
					<ul>
						<li>started as a part of XSLT (<a href="http://www.w3.org/TR/1999/WD-xslt-19990421">WD-xslt-19990421</a> → <a href="http://www.w3.org/1999/07/WD-xslt-19990709">WD-xslt-19990709</a>)</li>
						<li>reused in a number of other W3C specifications (XSDL, DOM)</li>
					</ul>
					<li>Complete overhaul for XSLT 2.0 and XQuery</li>
					<ul>
						<li><a href="http://www.w3.org/TR/xpath20/">XPath 2.0</a> as the core language</li>
						<li>a much larger set of <a href="http://www.w3.org/TR/xpath-functions/">functions and operators</a></li>
						<li>the underlying <a href="http://www.w3.org/TR/xpath-datamodel/">data model</a> which describes the foundation</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>How XPath Works</title>
			<part>
				<title>The XPath Tree Model</title>
				<slide>
					<title>Starting from the Infoset</title>
					<ul>
						<li>XPath operates on an <link href="xpath-tree">abstract data model</link></li>
						<ul>
							<li>a tree derived from the <link href="infoset"/></li>
							<li>a simplification (another one!) of the underlying XML</li>
						</ul>
						<li>The Infoset is turned into an <em>XPath node tree</em></li>
						<ul>
							<li>11 infoset item types → 7 XPath node tree node types</li>
							<li>character items are merged into text nodes</li>
							<li>namespace declarations are no longer visible as attributes</li>
						</ul>
					</ul>
				</slide>
				<slide id="not-xpath">
					<title>What is <u>Not</u> in the XPath Tree</title>
					<ul>
						<li>The same things which are <link href="not-infoset">not in the Infoset</link></li>
						<ul>
							<li>the order of attributes in a start tag</li>
							<li>the types of quotes around attribute values</li>
							<li>character references and entities (<code>&amp;#xFC;</code>/<code>&amp;uuml;</code> → <code>ü</code>)</li>
						</ul>
						<li>And some more …</li>
						<ul>
							<li>namespace declarations are no longer visible as attributes</li>
							<li>notations and unexpanded entity references</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XPath Evaluation</title>
				<slide>
					<title>Tree In / Selection Out</title>
					<ul>
						<li>XPath evaluates an expression based on a tree</li>
						<li>Where the tree comes from is out of XPath's scope</li>
						<li>The result of the evaluation is a selection</li>
						<ul>
							<li><code>//img[not(@alt)]</code> → select all images which have no <code>alt</code> attribute</li>
							<li><code>count(//img)</code> → return the number of images</li>
							<li><code>/descendant::img[3]/@src</code> → return the third image's <code>src</code> URI</li>
							<li><code>starts-with(/html/@lang, 'en')</code> → test whether the document's language is english</li>
						</ul>
						<li>Syntax errors may occur</li>
						</ul>
				</slide>
			</part>
		</part>
		<part>
			<title short="XPath 1.0">XPath 1.0 Revisited</title>
			<slide>
				<title>Source Document</title>
				<listing src="xlinked-class.xml" line="81-98" title="Web-Based Publishing Class – Spring 2007"/>
			</slide>
			<slide>
				<title>XPath Expressions</title>
				<ul>
					<li>XPaths can be location paths</li>
					<pre>//ul/li</pre>
					<li>XPaths can use functions</li>
					<pre>id('dret')</pre>
					<li>XPaths can be expressions yielding atomic values</li>
					<pre>substring-before(id('dret'), ' ')</pre>
					<li>XPaths can combine all of the above</li>
					<pre>count(//ul/li[starts-with(substring-after(., ' '), 'W')])</pre>
				</ul>
			</slide>
			<slide>
				<title>Axes</title>
				<img style="width : 86% ; margin : 4% ; " src="xpath-axes.png"/>
			</slide>
		</part>
		<part>
			<title>Ease of Use</title>
			<slide>
				<title>Easier to Understand</title>
				<ul>
					<li>XPath 2.0 provides better ways to write XPaths</li>
					<ul>
						<li>some constructs allow better ways of writing XPaths</li>
						<li>some constructs allow things previously impossible in XPath</li>
					</ul>
					<li>XPath usually is embedded in another language (XQuery, XSLT)</li>
					<ul>
						<li>even in XSLT 1.0, there was always a trade-off between XPath and XSLT</li>
						<li>with XPath 2.0, even more powerful XPaths can be implemented</li>
					</ul>
					<li>Finding a good balance between XPath and the host language is an art</li>
					<ul>
						<li>very complex XPaths can become almost undecipherable</li>
						<li>there is no final answer, coding styles vary based on language preference</li>
					</ul>
				</ul>
				<pre>&lt;listing src="xlinked-class.xml" line="81-98"/></pre>
				<pre>string-join(tokenize( if ( exists(@encoding) ) then unparsed-text($fileuri, @encoding) else unparsed-text($fileuri), '\r?\n')[(position() ge number(tokenize(current()/@line, '\-')[1])) and (position() le number(tokenize(current()/@line, '\-')[2]))], '&amp;#xa;')</pre>
			</slide>
			<part id="xpath20-conditional">
				<title>Conditional Expressions</title>
				<slide>
					<title>Control Flow in XPath</title>
					<ul>
						<li>XPath 1.0 expressions <q>control flow</q> is based on predicates</li>
						<ul>
							<li>the results of location path steps are filtered by predicates</li>
							<li>this can be used to <q>emulate</q> control flow</li>
							<li>this technique is limited because it can only be applied to nodes</li>
						</ul>
						<li>XPath 2.0 introduces conditional expressions</li>
						<ul>
							<li>a condition is given which is interpreted as a boolean</li>
							<li>based on the result, either the <xpath>then</xpath> or the <xpath>else</xpath> part is evaluated</li>
							<li>the else part may not be omitted</li>
						</ul>
					</ul>
					<pre>if ( … ) then … else …</pre>
					<pre>if ( @sex eq 'm' ) then 'Sir' else 'Madam'</pre>
					<pre>if ( @sex eq 'm' ) then 'Sir' else if ( @sex eq 'f' ) then 'Madam' else 'Whatever'</pre>
				</slide>
				<slide>
					<title>Less XSLT</title>
					<listing src="names.xml"/>
					<pre>first | last[not(../first)]</pre>
					<pre><![CDATA[<xsl:variable name="name">
	<xsl:choose>
		<xsl:when test="first">
			<xsl:value-of select="first"/>
		</xsl:when>
		<xsl:otherwise>
			<xsl:value-of select="last"/>
		</xsl:otherwise>
	</xsl:choose>
</xsl:variable>]]></pre>
					<pre>if ( exists(first) ) then first else last</pre>
				</slide>
			</part>
			<part id="xpath20-iterations">
				<title>Iterations</title>
				<slide>
					<title>Repeating Expression Evaluation</title>
					<ul>
						<li>Iteration repeatedly applies an expression to a sequence of items</li>
						<ul>
							<li>the notion of <link href="xpath20-sequences"/> is central to this concept</li>
							<li>this requires variables for binding and evaluation</li>
						</ul>
						<li>Iterations clearly demonstrate the change in expressiveness</li>
						<ul>
							<li>they introduce functionality which previously was limited to host languages</li>
						</ul>
					</ul>
					<pre>for $… in … return …</pre>
					<pre>for $i in //name return $i/last</pre>
					<pre>for $i in //name return if ( exists($i/first) ) then $i/first else $i/last</pre>
				</slide>
				<slide>
					<title>Iterations vs. Location Paths</title>
					<ul>
						<li>Every location path can be written using iterations</li>
						<pre>/names/name/last</pre>
						<pre>for $i in /names return for $j in $i/name return $j/last</pre>
						<li>Iterations are a more generalized way of evaluation</li>
						<ul>
							<li>path expressions work on nodes only</li>
							<pre>for $i in 1 to 10 return $i</pre>
							<li>path expression sort by document order and eliminate duplicates</li>
							<pre>//last/../..</pre>
							<pre>for $i in //last return for $j in $i/.. return $j/..</pre>
							<li>location steps change the context, iterations use the variable for this purpose</li>
						</ul>
						<li>Location paths are a useful syntax and method for tree navigation</li>
					</ul>
				</slide>
			</part>
			<part id="xpath20-quantified">
				<title>Quantified Expressions</title>
				<slide>
					<title>Testing Sequences</title>
					<ul>
						<li>Testing whether some or all items of a sequence satisfy a condition</li>
						<ul>
							<li>the notion of <link href="xpath20-sequences"/> is central to this concept</li>
							<li>this requires variables for binding and evaluation</li>
						</ul>
						<li>Quantifiers are well-known from query languages</li>
						<ul>
							<li><xpath>some</xpath> iterates over items and succeeds after the first success</li>
							<li><xpath>every</xpath> iterates over items and fails after the first failure</li>
							<li>both constructs are good candidates for optimization</li>
						</ul>
					</ul>
					<pre>( some | every ) $… in … satisfies …</pre>
					<pre>some $i in //*[@xlink:type='locator']/@xlink:href satisfies $i eq $query-uri</pre>
					<pre>every $i in //li/@id satisfies //*[@xlink:type='locator'][@xlink:href=concat('#', $i)]</pre>
				</slide>
			</part>
		</part>
		<part id="xpath20-sequences">
			<title>Sequences</title>
			<slide>
				<title>Major Changes</title>
				<ul>
					<li>XPath 1.0 has a very simple data model</li>
					<ol>
						<li>node sets: <code>//img[not(@alt)]</code></li>
						<li>number: <code>count(//img)</code></li>
						<li>string: <code>/descendant::img[3]/@src</code></li>
						<li>boolean: <code>starts-with(/html/@lang, 'en')</code></li>
					</ol>
					<li>XPath 2.0 needs a more powerful model for its advanced functionality</li>
					<ul>
						<li>everything in XPath 2.0 is a sequence</li>
						<li>sequences can contain a mix of items of various types</li>
						<li>sequences cannot be nested (there are no sequences of sequences)</li>
					</ul>
				</ul>
				<pre>every $i in ( 11, 22, 33, 'string' ) satisfies string(number($i)) ne 'NaN'</pre>
			</slide>
			<slide>
				<title>Divide and Conquer</title>
				<ul>
					<li>Sequences are part of the <link href="xdm"/></li>
					<ul>
						<li>data models are separate entities from evaluation languages</li>
						<li>a data model can be reused in different evaluation languages</li>
					</ul>
					<li>XDM is far more complex than its predecessor, the Infoset</li>
					<ul>
						<li>XSDL datatypes have been integrated into the data model</li>
						<li>Sequences allow more complex structures to exist</li>
					</ul>
					<li>Understanding the data model is key to understanding the language</li>
					<ul>
						<li>for simple XPaths, the mental model of XPath 1.0 works</li>
						<li>more advanced XPaths can only be understood when understanding XDM</li>
					</ul>
				</ul>
			</slide>
		</part>
        <part>
			<title>Applications</title>
			<slide>
				<title>Standalone</title>
				<ul>
					<li>XPath can be used in standalone XML tools</li>
					<ul>
						<li>editors provide XPath evaluation as <q>regular expressions for XML</q></li>
						<li>text-based searches in bigger XML documents are not a good idea</li>
					</ul>
					<li>Standalone tools are good for learning XPaths</li>
					<ul>
						<li>many tools support interactive evaluation</li>
						<li>seeing sequences visualized often is very helpful</li>
					</ul>
				</ul>
				<pre>for $i in ( 11, 22, 33, 'string' ) return ($i, number($i))</pre>
			</slide>
			<slide>
				<title>XQuery</title>
				<ul>
					<li><link href="xquery-1"/> is built on top of XPath 2.0</li>
					<ul>
						<li>XPath allows constructing sequences based on documents</li>
						<li>XPath has no way of generating new document structures</li>
					</ul>
					<li>XQuery builds a query language around XPath</li>
					<ul>
						<li>the basic idea is to provide a language for constructing results from sequences</li>
						<li>~80% of the complexity of XQuery are in XPath 2.0</li>
					</ul>
				</ul>
				<pre ahref="http://www.stylusstudio.com/xquery_primer.html"><![CDATA[declare variable $firstName external;
<videos featuring="{$firstName}"> {
  let $doc := .
  for $v in $doc//video, $a in $doc//actors/actor
  where ends-with($a, $firstName) and $v/actorRef = $a/@id
  order by $v/year
  return
    <video year="{$v/year}"> { $v/title } </video> }
</videos>]]></pre>
			</slide>
			<slide>
				<title>XSLT 2.0</title>
				<ul>
					<li>XSLT 2.0 is based on <link href="xslt-1">XSLT 1.0</link> and built on top of XPath 2.0</li>
					<ul>
						<li>XPath allows constructing sequences based on documents</li>
						<li>XPath has no way of generating new document structures</li>
					</ul>
					<li>XSLT focuses on transformations rather than queries</li>
					<ul>
						<li><q>a query is a transformation is a query</q></li>
						<li>language preference is more a question of training and experience</li>
					</ul>
					<li>Many problems can be appropriately solved with both languages</li>
					<ul>
						<li>XQuery is favored by database people and by the big vendors</li>
						<li>XSLT 2.0 is favored by XML people who worked a lot with XSLT 1.0</li>
						<li>implementations could easily support both languages</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Easy Transition</title>
				<ul>
					<li>XPath 1.0 users can start using XPath 2.0 right away</li>
					<li>apart from a <a href="http://www.w3.org/TR/xpath20/#id-backwards-compatibility" title="XPath 2.0 Spec: Backwards Compatibility with XPath 1.0">few corner cases</a>, the results will be the same</li>
					<li>XPath 2.0 has a huge set of <a href="http://www.w3.org/TR/xpath-functions/">functions and operators</a></li>
					<li>XSDL types can be used, values can be cast</li>
					<li>Regular expressions are supported for working with strings</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="xdm">
        <title short="XDM">XQuery 1.0 and XPath 2.0 Data Model (XDM)</title>
        <date>2007-10-11</date>
        <toc class="resources"><a href="http://www.w3.org/TR/xpath-datamodel/" title="W3C XDM Spec">Spec</a></toc>
        <toc class="abstract">While XPath 2.0 syntactically is an extension of XPath 1.0, the underlying data model has changed quite radically. Instead of XPath 1.0's simple concept of four datatypes (node set, number, string, boolean), the <em>XQuery 1.0 and XPath 2.0 Data Model (XDM)</em> is based on <em>sequences</em> and allows much more sophisticated ways of data representation and manipulation. Furthermore, XDM includes the datatypes defined by XSDL, which results in an complex and powerful collection of built-in datatypes and operations on these datatypes.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="xdm-sequences">
			<title>Sets vs. Sequences</title>
			<slide>
				<title>XPath 1.0 Sets</title>
				<ul>
					<li>XPath 1.0 has a very simple data model of four types</li>
					<ol>
						<li>node sets: <code>//img[not(@alt)]</code></li>
						<li>number: <code>count(//img)</code></li>
						<li>string: <code>/descendant::img[3]/@src</code></li>
						<li>boolean: <code>starts-with(/html/@lang, 'en')</code></li>
					</ol>
					<li>When XPath 1.0 was created, the XML world was <em>untyped</em></li>
					<ul>
						<li>XML documents contain content in text nodes and attribute values</li>
						<li>XPath introduced its humble world of three datatypes</li>
					</ul>
					<li>Dealing with types in XSLT 1.0 is very unpleasant</li>
					<ul>
						<li>all datatypes beyond the basic types must be implemented by hand</li>
						<li>all operations on these types must be implemented as well</li>
						<li><a href="http://www.exslt.org/">EXSLT</a> collects modules for frequently used datatypes</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XPath 2.0 Sequences</title>
				<ul>
					<li>XSDL introduces the concept of <em>typed data</em> to the XML world</li>
					<ul>
						<li>one part of XSDL is its ability to <em>validate documents</em></li>
						<li>the other part of XSDL is the fact that validation produces <em>type annotations</em></li>
					</ul>
					<li><link href="xpath20-sequences"/> are XPath's mechanism where these types show up</li>
					<li>XPath 2.0 needs a more powerful model for its advanced functionality</li>
					<ul>
						<li>everything in XPath 2.0 is a sequence (of typed items)</li>
						<li>sequences can contain a mix of items of various types</li>
						<li>sequences cannot be nested (there are no sequences of sequences)</li>
					</ul>
					<li>Sequences replace <em>node sets</em>, which in XDM do not exist anymore</li>
					<ul>
						<li><q>Sequences replace node-sets from XPath 1.0. In XPath 1.0, node-sets do not contain duplicates. In generalizing node-sets to sequences in XPath 2.0, duplicate removal is provided by functions on node sequences.</q> (<a href="http://www.w3.org/TR/xpath-datamodel/#sequences" title="XDM Specification: Sequences">XDM</a>)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xpath20-comparisons">
			<title>Comparisons</title>
			<slide id="xpath20-comparisons-general">
				<title>General Comparisons</title>
				<pre>= != &lt; &lt;= > >=</pre>
				<ul>
					<li>XPath 1.0 only has these operators</li>
					<ul>
						<li>they are defined to work on any of the four datatypes</li>
						<li>node set comparisons are defined in a <a href="http://www.w3.org/TR/xpath#booleans">rather complex way</a></li>
						<li>in particular, XPath 1.0 comparisons often involve type casting</li>
					</ul>
					<li>XPath 2.0 introduces <link href="xpath20-comparisons-value"/> for comparing atomic values</li>
					<ul>
						<li>they are introduced to provide a set of operators with less surprises</li>
						<li>the original XPath 1.0 operators are redefined to work on sequences</li>
					</ul>
					<li>General comparisons can be expressed using <link href="xpath20-quantified"/></li>
					<ul>
						<li>potentially a large number of comparisons</li>
						<pre>$X = $Y</pre>
						<pre>some $x in $X, $y in $Y satisfies $x eq $y</pre>
					</ul>
				</ul>
			</slide>
			<slide id="xpath20-comparisons-value">
				<title>Value Comparisons</title>
				<pre>eq ne lt le gt ge</pre>
				<ul>
					<li>These operators have been introduced by XPath 2.0</li>
					<ul>
						<li>they work on single values only</li>
						<li>they should be used except when sequences are allowed as operands</li>
					</ul>
					<li>The value comparison operators also have built-in type conversion rules</li>
					<ul>
						<li>prior to anything else, both operands are <em>atomized</em></li>
						<li>comparing with an empty sequence always yields an empty sequence</li>
						<li>comparing with a sequence with more than one item yields an error</li>
						<li>after that, the values are converted to a <em>common type</em></li>
					</ul>
				</ul>
			</slide>
			<slide id="xpath20-comparisons-node">
				<title>Node Comparisons</title>
				<pre>is &lt;&lt; >></pre>
				<ul>
					<li>Comparing nodes by identity or document order</li>
					<ul>
						<li>node identity is very cumbersome to test in XPath 1.0</li>
						<li>XPath 2.0 makes axis support optional</li>
						<li>some XQuery implementations do not support <xpath>preceding*</xpath> and <xpath>following*</xpath></li>
					</ul>
					<li><q><xpath>$a is $b</xpath></q> is true only if both variables identify the same node</li>
					<ul>
						<li>when processing documents, identity often is more relevant than equality</li>
						<li>much better than XPath 1.0's <q><xpath>generate-id($a) = generate-id($b)</xpath></q></li>
					</ul>
					<li><q><xpath>$a &lt;&lt; $b</xpath></q> is true if $a precedes $b in document order</li>
					<ul>
						<li>precedence (as in the <xpath>preceding</xpath> axis) excludes containment</li>
						<li>if the nodes are in different documents, the result is undefined</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Some Surprises</title>
				<ul>
					<li>Sequences make some things more complicated than atomic values</li>
					<li><q><xpath>$X = $X</xpath></q> is not always true</li>
					<ul>
						<li>if <xpath>$X</xpath> is the empty sequence, there are no equal items</li>
					</ul>
					<li><q><xpath>$X != 'test'</xpath></q> and <q><xpath>not($X = 'test')</xpath></q> are not the same</li>
					<ul>
						<li><q><xpath>$X != 'test'</xpath></q> is true if one item in <xpath>$X</xpath> is not equal to <xpath>'test'</xpath></li>
						<li><q><xpath>not($X = 'test')</xpath></q> is true if no item in <xpath>$X</xpath> is equal to <xpath>'test'</xpath></li>
						<li>the classical case are optional parts: <q><xpath>@mode != 'test'</xpath></q> is <xpath>false</xpath> if there is no <xpath>@mode</xpath>!</li>
						<li>it is generally a good idea to avoid <xpath>!=</xpath> and use <xpath>not()</xpath> and <xpath>=</xpath></li>
					</ul>
					<li><q><xpath>$X = $Y</xpath></q> and <q><xpath>$Y = $Z</xpath></q> does not imply <q><xpath>$X = $Z</xpath></q></li>
					<ul>
						<li>the reason is that comparisons are done pairwise (the comparisons are <em>sets of comparisons</em>)</li>
						<li><q><xpath>(1, 2)</xpath></q>, <q><xpath>(2, 3)</xpath></q>, and <q><xpath>(3, 4)</xpath></q> illustrate this behavior</li>
						<li><xpath>=</xpath> only tests for <q>partial equality</q> (one item must be equal)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Available Datatypes</title>
			<slide>
				<title>XSDL Everywhere</title>
				<ul>
					<li><link href="xsdl-1">XSDL</link> is the foundation for many XML technologies today</li>
					<ul>
						<li>it is a complex standard that few people really understand</li>
						<li>nevertheless, the W3C hardwires it into many new specifications</li>
					</ul>
					<li>XSDL has two parts</li>
					<ul>
						<li>Part 1 defines the structures defining XML documents and schemas as a whole</li>
						<li>Part 2 defines an extensible datatype library based on a set of built-in datatypes</li>
					</ul>
					<li>XSDL does two things</li>
					<ul>
						<li>it defines how documents are validated against a schema by inspecting the document</li>
						<li>it defines how documents are annotated with the results of the validation process</li>
					</ul>
					<li>XDM is based on annotated documents</li>
					<ul>
						<li>documents without annotations are just a special case (everything is untyped)</li>
						<li>XDM has been built around the assumption that people use schemas</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XSDL Type Hierarchy</title>
				<img style="height : 75% ; margin : 2% ; " src="xsd-type-hierarchy.gif" href="http://www.w3.org/TR/xmlschema-2/#built-in-datatypes"/>
			</slide>
			<slide>
				<title>XDM Type Hierarchy</title>
				<img style="width : 86% ; margin : 4% ; " src="xdm-type-hierarchy.png" href="http://www.w3.org/TR/xpath-datamodel/#types-hierarchy"/>
			</slide>
			<slide>
				<title>Simplified XDM Type Hierarchy</title>
				<img style="height : 70% ; margin : 2% ; " src="xdm-type-hierarchy-simplified.jpg" href="http://www.w3.org/TR/xquery/#id-predefined-types"/>
			</slide>
			<slide>
				<title>Atomic Types</title>
				<ul>
					<li>XSDL simple types and XPath's atomic types are similar</li>
					<ul>
						<li>simple types can be atomic, union, or list types</li>
						<li>XPath only considers XSDL's atomic types as its own atomic types</li>
					</ul>
					<li>XPath adds four more types to XSDL's simple types</li>
					<ul>
						<li><xpath>dayTimeDuration</xpath> and <xpath>yearMonthDuration</xpath> for better duration handling</li>
						<li><xpath>anyAtomicType</xpath> and <xpath>untypedAtomic</xpath> for the type hierarchy</li>
					</ul>
					<li>Type-based processing is only available with schema support</li>
					<ul>
						<li>XSLT 2.0 distinguishes <a href="http://www.w3.org/TR/xslt20/#basic-conformance" title="W3C XSLT 2.0 Spec">basic</a> and <a href="http://www.w3.org/TR/xslt20/#schema-aware-conformance" title="W3C XSLT 2.0 Spec">schema-aware</a> XSLT processors</li>
						<li>XQuery 1.0 distinguishes <a href="http://www.w3.org/TR/xquery/#id-minimal-conformance" title="W3C XQuery 1.0 Spec">minimal</a>, <a href="http://www.w3.org/TR/xslt20/#http://www.w3.org/TR/xquery/#id-schema-import-feature" title="W3C XQuery 1.0 Spec">schema importing</a>, and <a href="http://www.w3.org/TR/xquery/#id-schema-validation-feature" title="W3C XQuery 1.0 Spec">schema validating</a> XQuery processors</li>
					</ul>
				</ul>
			</slide>
		</part>
        <part>
			<title>Working with Sequences</title>
			<slide>
				<title>Testing Sequence Cardinality</title>
				<ul>
					<li>Testing for empty sequences</li>
					<pre>empty(()) = true()</pre>
					<li>Testing for non-empty sequences</li>
					<pre>exists((1, 2, 3)) = true()</pre>
					<li>Cleaner code for conditional expressions</li>
					<ul>
						<li>good code should not rely on implicit type conversions</li>
						<pre>if ( exists(@email) ) then …</pre>
						<pre>if ( empty(@email) ) then …</pre>
					</ul>
				</ul>
			</slide>
			<slide>
				<title><q>Set Operations</q> on Sequences</title>
				<ul>
					<li>Merging two node <q>sets</q> (no duplicates, document order)</li>
					<pre>() | ()</pre>
					<li>Intersecting two node <q>sets</q> (no duplicates, document order)</li>
					<pre>() intersect ()</pre>
					<li>Subtracting two node <q>sets</q> (no duplicates, document order)</li>
					<pre>() except ()</pre>
					<li>Comparing sequences item by item for deep equality</li>
					<pre>deep-equal((1, 2, 3), (1, 3, 2)) = false()</pre>
				</ul>
			</slide>
			<slide>
				<title>Manipulating Sequences (I)</title>
				<ul>
					<li>Concatenating sequences</li>
					<pre>((1, 2, 3), (4, 5, 6)) = (1, 2, 3, 4, 5, 6)</pre>
					<li>Reversing sequences</li>
					<pre>reverse((1, 2, 3, 4)) = (4, 3, 2, 1)</pre>
					<li>Finding items in sequences</li>
					<pre>index-of((1, 2, 3, 1), 1) = (1, 4)</pre>
					<li>Cutting sub-sequences out of sequences</li>
					<pre>subsequence((1, 2, 3, 4, 5, 6, 7), 5, 2) = (5, 6)</pre>
				</ul>
			</slide>
			<slide>
				<title>Manipulating Sequences (II)</title>
				<ul>
					<li>Inserting items into sequences</li>
					<pre>insert-before(("one", "two", "four"), 3, "three") = ("one", "two", "three", "four")</pre>
					<li>Removing items from sequences</li>
					<pre>remove(("white", "white", "black", "white"), 3) = ("white", "white", "white")</pre>
					<li>Removing duplicates from a sequence</li>
					<pre>distinct-values((1, 2, 3, 1, 2, 6, 7)) = (1, 2, 3, 6, 7)</pre>
					<li>Help your optimizer!</li>
					<pre>unordered((1, 2, 3, 4, 5)) = (3, 4, 1, 2, 5)</pre>
				</ul>
			</slide>
			<slide>
				<title>Aggregating Sequences</title>
				<ul>
					<li>Counting the number of items in a sequence</li>
					<pre>count((1, 2, 3, 4, 5, 6)) = 6</pre>
					<li>Calculating the average (the types must be compatible)</li>
					<pre>avg((1, 2, 3, 4, 5, 6)) = 3.5</pre>
					<li>Getting maximum or minimum values from a sequence (the types must be compatible)</li>
					<pre>max($seq) ge min($seq)</pre>
					<li>Calculating the sum of sequence items  (the types must be compatible)</li>
					<pre>sum(1 to 42) = 903</pre>
				</ul>
			</slide>
        </part>
        <part>
			<title>Working with Values</title>
			<slide>
				<title>Type Casting</title>
				<ul>
					<li>Values often are untyped</li>
					<ul>
						<li>they may be part of a schema-less document</li>
						<li>they may be extracted as substring of some text value</li>
						<li>XSLT 2.0 allows to read text files (these texts are never typed)</li>
					</ul>
					<li>For intermediate results typed values may be advantageous</li>
					<ul>
						<li>certain operations are only possible on typed values</li>
						<li>code using typed values usually is more robust</li>
					</ul>
					<li>XPath 2.0 has several ways to handle types and instances</li>
					<pre>42 instance of xs:integer</pre>
					<pre>'2007-02-13' castable as xs:date</pre>
					<pre>'2007-02-13' cast as xs:date</pre>
					<pre>if ( $i castable as xs:… ) then $i cast as xs:… else ()</pre>
				</ul>
			</slide>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Advanced Selections</title>
				<ul>
					<li>XPath 2.0 is a powerful language for selection in XML</li>
					<li>XDM provides the sequence model as a foundation</li>
					<li>Functions and operators allow advanced sequence handling</li>
					<li>XPath 2.0 takes some time to get used to it</li>
					<li>Problems can be used in a variety of ways</li>
				</ul>
			</slide>
			<slide>
				<title>Sample XML</title>
				<listing src="dretbiblio.xml" line="3-26"/>
			</slide>
			<slide>
				<title>Questions</title>
				<ul>
					<li>Find all publications published in 2004 (58 / <img style="height : 0.8em" src="question-mark.gif" title="//reference[starts-with(date/@value, '2004')]"/> )</li>
					<ul>
						<li><xpath>date/@value</xpath> can be <code>YYYY[-MM[-DD]]</code> </li>
					</ul>
					<li>Find the last names of all XML-oriented authors (48 / <img style="height : 0.8em" src="question-mark.gif" title="distinct-values(//keywordref[@type eq 'topic-xml']/ancestor::reference//surname)"/> )</li>
					<ul>
						<li><xpath>keywordref/@type</xpath> must be set to <code>topic-xml</code> (ignore the <xpath>@weight</xpath>)</li>
						<li>authors should only be counted once (name clashes are out of scope for this question)</li>
					</ul>
					<li>Find all references where at least two authors have the same given name (8 / <img style="height : 0.8em" src="question-mark.gif" title="//reference[count(descendant::givenname) > count(distinct-values(descendant::givenname))]"/> )</li>
					<ul>
						<li><xpath>descendant::givenname</xpath> is a safe way to find all given names for a reference</li>
						<li>given names cannot be repeated (the schema does not allow repetition)</li>
					</ul>
					<li>Which publications dated 2000 or later have been updated? (48 / <img style="height : 0.8em" src="question-mark.gif" title="for $i in //xref[@type eq 'updates']/@target return //reference[@name eq $i] [date/@value ge '2000']"/> )</li>
					<ul>
						<li><q><xpath>xref[@type eq 'updates']</xpath></q> points to updated references (to their <xpath>@name</xpath>)</li>
						<li>strings can be compared with <link href="xpath20-comparisons-value"/></li>
					</ul>
					<li>What is the average number of authors per publication? (1.79513<span style="text-decoration: overline;">8</span> / <img style="height : 0.8em" src="question-mark.gif" title="avg(for $i in //reference return count($i/names/*))"/> )</li>
					<ul>
						<li><xpath>reference/names</xpath> can have <xpath>name</xpath> or <xpath>person</xpath> children (<img style="height : 0.8em" src="question-mark.gif" title="distinct-values(//reference/names/*/local-name())"/>), count both</li>
					</ul>
				</ul>
			</slide>
        </part>
    </presentation>
	<presentation id="xslt20-1">
		<title short="XSLT 2.0 1">XML Transformations (XSLT) 2.0 – Part I</title>
		<date>2007-10-16</date>
		<toc class="resources"><a href="http://www.w3.org/TR/xslt20/" title="W3C XSLT 2.0 Spec">Spec</a></toc>
		<toc class="abstract">While <em>XML Transformations (XSLT) 1.0</em> has become a successful programming language widely used for transforming XML documents, its limitations sometimes make it difficult to use XSLT in a good way. An important reason for many of the limitations is the fact that XSLT 1.0 has been designed as a client-side language. Building on XSLT 1.0 and XPath 2.0, <em>XML Transformations (XSLT) 2.0</em> improves the language in a variety of ways.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XSLT 1.0 Restrictions</title>
			<ul>
				<li>XSLT as a client-side language</li>
				<ul>
					<li>XSLT 1.0 was designed to run in a browser (similar to CSS)</li>
					<li>XSLT today is almost never used as a client-side language</li>
				</ul>
				<li>Processing  model geared towards client-side usage</li>
				<ul>
					<li>there always is one input document and one output document</li>
					<li>runtime errors have to be avoided as much as possible</li>
				</ul>
				<li>Data types and XML</li>
				<ul>
					<li>XML is a very weakly <q>typed</q> language (strings, IDs, IDREFs)</li>
					<li>any application data types must be implemented in application code</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>XSLT 2.0 Improvements</title>
			<ul>
				<li>XSLT as a server-side language</li>
				<ul>
					<li>XSLT 2.0 better supports server-side usage</li>
					<li>native XSLT support in browsers might never happen reliably</li>
					<li>shipping XML and transforming it in the browser is not required very often</li>
				</ul>
				<li>Processing  model extended to better support server-side usage</li>
				<ul>
					<li>there can be more than one output document</li>
					<li>runtime errors can be a very valuable tool for detecting program errors</li>
				</ul>
				<li><link href="xsdl-1">XSDL</link> introduces a datatype model for XML</li>
				<ul>
					<li><link href="xsdl-simple-types"/> provide a basic vocabulary of datatypes</li>
					<li>many <link href="xpath20">XPath 2.0</link> functions support working with the simple types</li>
					<li><link href="xsdl-complex-types"/> allow the definition of structured types</li>
					<li>type checking is supported for simple and complex types</li>
				</ul>
			</ul>
		</slide>
		<part id="xslt20-result-documents">
			<title>Multiple Result Documents</title>
			<slide>
				<title>One XML, Many HTML</title>
				<ul>
					<li>The original model of XSLT 1.0 was a 1:1 mapping of XML and HTML</li>
					<ul>
						<li>a browser retrieves an XML document an generates HTML from it</li>
						<li>this assumed that the granularity of XML is the same as for HTML</li>
					</ul>
					<li>XML documents often represent complex information</li>
					<ul>
						<li>in many cases this is too much information to be displayed on just one HTML page</li>
						<li>typically the complex model of XML is mapped to interlinked HTML</li>
					</ul>
					<li>HTML generated from XML can reflect many different views</li>
					<ul>
						<li>one HTML for each core concept of the XML information model</li>
						<li>indices that make available other HTML through faceted lists</li>
						<li>table of contents using various concepts for listing entries</li>
						<li>alternative representations for core concepts (various dimensions possible)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Generating HTML Pages</title>
				<listing src="references.xsl" line="4-26"/>
			</slide>
			<part id="xslt-identification">
				<title>Creating Identifiers</title>
				<slide>
					<title>Navigable Hypertext</title>
					<ul>
						<li>Create as many hypertext links as possible</li>
						<ul>
							<li>styling should make sure that hyperlink formatting does not degrade legibility</li>
							<li>use different styles for <em>essential links</em> and <em>ancillary links</em></li>
							<li>ancillary links should not use <a href="http://www.webpagesthatsuck.com/mysterymeatnavigation00.html">mystery meat navigation</a>, but something close</li>
						</ul>
						<li>Purely generated pages get generated names</li>
						<ul>
							<li>table of contents and listings based on various criteria</li>
							<li>index pages for better access to page contents</li>
						</ul>
						<li>Pages representing core concepts should get identifier names</li>
						<ul>
							<li>these identifiers should be stable so that bookmarks do not break</li>
							<li>they can <link href="identifier-reuse">reuse XML identifiers</link>, <link href="identifier-derivation">derive identifiers from content</link>, or <link href="identifier-random">generate random identifiers</link></li>
							<li>a well-defined and stable URI naming policy is important</li>
						</ul>
					</ul>
				</slide>
				<slide id="identifier-reuse">
					<title>Reuse Existing Identifiers</title>
					<ul>
						<li>Many core concepts in XML documents have identifiers</li>
						<ul>
							<li>are these identifiers mandatory?</li>
							<li>are these identifiers a good choice for HTML page names?</li>
							<li>sometimes simple string functions can help to create better identifiers</li>
						</ul>
						<li>URI design is a core part of <a href="../web-fall07/rest">REST</a> and essential to good Web architecture</li>
						<li>True REST design will also allow the creation of new resources</li>
						<ul>
							<li>it is possible to <http>PUT</http> new resources into existing collections</li>
							<li>it is possible to <http>POST</http> new resources to existing collections</li>
							<li><http>PUT</http> and <http>POST</http> are different with regard to the resource name</li>
						</ul>
						<li>These identifiers are a core part of the application data model</li>
					</ul>
				</slide>
				<slide id="identifier-derivation">
					<title>Generate Content-Based Identifiers</title>
					<ul>
						<li>Sometimes more <q>speaking</q> identifiers are required</li>
						<ul>
							<li>easier to understand when looking at the identifier and the URI</li>
							<li>often there is a danger of name clashes</li>
						</ul>
						<li>Blogs often use a combination of the post date and the title</li>
						<ul>
							<li>dates should appear as hierarchical path segments such as <code>2007/10/25</code></li>
							<li>titles are appended by matching the post title to URI syntax (replace and truncate)</li>
							<li>name clashes can only occur on the same day using a very similar title</li>
							<li>date navigation can be used to provide access to date-based index pages</li>
						</ul>
						<li>Generated identifiers should be stable (name clashes should not break them)</li>
					</ul>
				</slide>
				<slide id="identifier-random">
					<title>Generate Random Identifiers</title>
					<ul>
						<li>Sometimes it may not be required or possible to reuse data for identifiers</li>
						<ul>
							<li>this may be true if there is no identifier and no <q>main property</q></li>
							<li>generated identifiers can be design to be very compact</li>
						</ul>
						<li>Random identifiers should use some pseudo-random algorithm</li>
						<ul>
							<li>one possible solution is a fingerprint algorithm such as <a href="http://www.miraclesalad.com/webtools/md5.php">MD5</a></li>
							<li>another solution is a really random solution such as <a href="http://tinyurl.com/">TinyURL</a></li>
						</ul>
						<li>It is necessary to keep track of the generated identifiers</li>
						<ul>
							<li>collisions are possible (in particular in case of short random values)</li>
							<li>in case of a collision an alternative identifier must be assigned</li>
						</ul>
					</ul>
				</slide>
			<slide>
				<title>Using Existing Identifiers</title>
				<listing src="references.xsl" line="27-47"/>
			</slide>
			</part>
		</part>
		<part id="xslt20-text-processing">
			<title>Text Processing</title>
			<slide>
				<title>Text Processing in XSLT 1.0</title>
				<ul>
					<li>XPath 1.0 provides a small number of <a href="http://www.w3.org/TR/xpath#section-String-Functions">string functions</a></li>
					<ul>
						<li>the selection of functions is very limited and sometimes restrictive</li>
						<li>more advanced functionality is not available (in particular, no <link href="xslt20-regex"/>)</li>
					</ul>
					<li>Text documents cannot be processed at all in XSLT 1.0</li>
					<ul>
						<li>XSLT 1.0 assumes that valuable input data always is XML</li>
						<li>text is a straightforward extension of the XSLT processing model</li>
						<li>binary data access would require a much bigger change of the language</li>
					</ul>
					<li>XSLT 2.0 extends XSLT to support import <em>and</em> export</li>
					<ul>
						<li>XSLT 1.0 already supports text document as an output format</li>
						<li>XSLT 2.0 now supports text documents as an input format</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Accessing Text Documents</title>
				<slide>
					<title>Non-XML in an XML World</title>
					<ul>
						<li>Many tools produce text-based output</li>
						<ul>
							<li>text structures are much simpler and often lossy</li>
							<li>at least <em>some</em> data can be used and reused</li>
						</ul>
						<li><xpath>unparsed-text()</xpath> reads a text-based document</li>
						<ul>
							<li>returns a string containing the complete input document</li>
							<li>optionally, an <xpath>encoding</xpath> can be specified (UTF-8 is the default)</li>
						</ul>
						<li>Text documents often also are <q>structured documents</q></li>
						<ul>
							<li>text uses sentences and paragraphs (empty lines) and maybe other <q>markup</q></li>
							<li>text formats often use commas or semicolon or spaces or tabs for structures</li>
							<li>XSLT 2.0's <link href="xslt20-text-transformation">text transformation features</link> support working with these structures</li>
						</ul>
					</ul>
				</slide>
				<slide id="csv">
					<title short="CSV">Comma-Separated Values (CSV)</title>
					<ul>
						<li><a href="http://dret.net/rfc-index/reference/RFC4180">RFC 4180</a> defines a textual format for <q>spreadsheet data</q></li>
						<li>CSV has been used for a long time, but some of the details were solved differently</li>
						<li>Defining a media type makes it easier for implementations to know what to expect</li>
						<ul>
							<li>the CSV registration not only registers the type, but also defines it</li>
						</ul>
						<li>CSV is not overly complex, but some issues have to be solved</li>
						<ul>
							<li>how to separate lines (CRLF)</li>
							<li>how to end the file (CRLF is allowed but optional)</li>
							<li>are headers allowed (yes, but they are not marked as such)</li>
							<li>may different lines use different numbers of fields (no)</li>
							<li>are spaces significant (yes)</li>
							<li>are quotes significant (no, they are delimiters, so quotes as values must be escaped)</li>
							<li>how to treat fields with CRLF, commas, or quotes (enclose the value in quotes)</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xslt20-text-transformation">
				<title>Transforming Text</title>
				<slide id="xslt20-regex">
					<title>Regular Expressions</title>
					<ul>
						<li><link href="xpath">XPath</link> is great for navigating through an XML tree</li>
						<ul>
							<li>all relevant structures of XML are represented and can be navigated</li>
							<li><link href="xpath">XPath 1.0</link> has some <a href="http://www.w3.org/TR/xpath#section-String-Functions">very basic string functions</a> to work with content</li>
							<li><link href="xpath20">XPath 2.0</link> adds <a href="http://www.w3.org/TR/xpath-functions/#string-functions">many more string functions</a></li>
						</ul>
						<li>XPath 2.0 extends the <a href="http://www.w3.org/TR/xmlschema-2/#regexs">regular expression syntax</a> of <link href="xsdl-1">XSDL</link></li>
						<ul>
							<li>the usual basic expressions known from many languages and tools</li>
							<li><xpath>^</xpath> and <xpath>$</xpath> for matching beginnings and ends (of strings or lines)</li>
							<li>XPath 2.0 supports <em>reluctant quantifiers</em> (indicated by a <xpath>?</xpath> following a quantifier)</li>
							<li>allows access to sub-expressions (important for selective <xpath>replace()</xpath> of substrings)</li>
							<li>allows back-references within expressions (references captured substrings)</li>
						</ul>
						<li>XPath 2.0 supports regular expressions in three functions</li>
						<ul>
							<li>XSLT 2.0 adds an instruction for <a href="xslt20-string-analyzation">parsing strings</a></li>
						</ul>
					</ul>
				</slide>
				<slide id="xslt20-match-replace">
					<title>Matching &amp; Replacing</title>
					<ul>
						<li><xpath>matches()</xpath> tests whether a string matches a given pattern</li>
						<ul>
							<li>an optional <em>flag</em> allows <a href="http://www.w3.org/TR/xpath-functions/#flags">different processing options</a></li>
							<pre>matches("abracadabra", "bra")    eq true()</pre>
							<pre>matches("abracadabra", "^a.*a$") eq true()</pre>
							<pre>matches("abracadabra", "^bra")   eq false()</pre>
						</ul>
						<li><xpath>replace()</xpath> selectively replaces parts of the input string</li>
						<ul>
							<li>supports the same flag as the <xpath>matches()</xpath> function</li>
							<pre>replace("abracadabra", "bra", "*")              eq "a*cada*"</pre>
							<pre>replace("abracadabra", "a.*a", "*")             eq "*"</pre>
							<pre>replace("abracadabra", "a.*?a", "*")            eq "*c*bra"</pre>
							<pre>replace("abracadabra", "a(.)", "a$1$1")         eq "abbraccaddabbra"</pre>
							<pre>replace("abracadabra", "^(.*?)b(.*)$", "$1c$2") eq "acracadabra"</pre>
						</ul>
					</ul>
				</slide>
				<slide id="xslt20-tokenizing">
					<title>Tokenizing</title>
					<ul>
						<li><xpath>tokenize()</xpath> turns a string into a sequence of strings</li>
						<ul>
							<li>supports the transition from <em>text structures</em> to the XDM concept of sequences</li>
							<li>supports the same flag as the <xpath>matches()</xpath> and <xpath>replace()</xpath> functions</li>
						</ul>
						<li>Tokenization is based on the concept of pattern-based structures</li>
						<ul>
							<li>input strings are using some recognizable way of separating substrings</li>
							<li>a pattern can be used to find substrings and return them as a sequence</li>
							<pre>tokenize("just plain  text", "\s+") eq ( "just", "plain", "text" )</pre>
							<pre>tokenize("1,15,,24,50,", ",")       eq ( "1", "15", "", "24", "50", "" )</pre>
							<pre>tokenize("HTML &lt;BR> tag&lt;br />soup", "\s*&lt;br\s*/?>\s*", "i") eq ("HTML", "tag", "soup")</pre>
						</ul>
					</ul>
				</slide>
				<slide id="xslt20-analyze-string">
					<title>Analyzing Strings</title>
					<ul>
						<li>XPath functions work on a string and return strings or sequences</li>
						<li><xslte>analyze-string</xslte> executes XSLT code for parts of the string</li>
						<ul>
							<li>XSLT code can create elements and/or attributes based on string input</li>
							<li>transforming text in XML often is referred to as <em>up-conversion</em></li>
						</ul>
						<li>two children contain code for handling the parsing process</li>
						<ul>
							<li><xslte>matching-substring</xslte> is executed for each matching part</li>
							<li><xslte>non-matching-substring</xslte> is executed for each non-matching part</li>
							<li>both of these elements are optional</li>
							<li>if two adjacent matching substrings are found, <xslte>matching-substring</xslte> is called twice</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Replacing Characters with Elements</title>
					<ul>
						<li>Replace all newline characters in the abstract element by <elem>br/</elem> elements</li>
					</ul>
					<pre><![CDATA[<xsl:analyze-string select="abstract" regex="\n">
  <xsl:matching-substring>
    <br/>
  </xsl:matching-substring>
  <xsl:non-matching-substring>
    <xsl:value-of select="."/>
  </xsl:non-matching-substring>
</xsl:analyze-string>]]></pre>
				</slide>
				<slide>
					<title>Replacing <q>Character Markup</q></title>
					<ul>
						<li>Turn textual conventions into XML markup</li>
						<ul>
							<li>citations are using <q>[…]</q> for the citation identification</li>
						</ul>
					</ul>
					<pre><![CDATA[<xsl:analyze-string select="body" regex="\[(.*?)\]">
  <xsl:matching-substring>
    <cite><xsl:value-of select="regex-group(1)"/></cite>
  </xsl:matching-substring>
  <xsl:non-matching-substring>
    <xsl:value-of select="."/>
  </xsl:non-matching-substring>
</xsl:analyze-string>]]></pre>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>A Better XSLT</title>
				<ul>
					<li>Multiple result documents can generate Web sites from one XML</li>
					<li>Highly interlinked hypertext can be produced by adding HTML links</li>
					<li>Text processing opens a new possibility for XSLT processing</li>
					<li>Regular expression support allows flexible processing of text documents</li>
					<li>XPath 2.0 and XSLT 2.0 support pattern-based text processing</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt20-2">
		<title short="XSLT 2.0 2">XML Transformations (XSLT) 2.0 – Part II</title>
		<date>2007-10-18</date>
		<toc class="resources"><a href="http://www.oreillynet.com/xml/blog/2007/03/reevaluating_xslt_20.html" title="Kurt Cagle O'Reilly Blog">Reevaluating XSLT 2.0</a></toc>
		<toc class="abstract">Many of the new features of XSLT 2.0 have their roots in XPath 2.0 and the underlying new data model of sequences. But some features of XSLT 2.0 really are part of the language itself, such as support for <em>user-defined functions</em>, and the ability to <em>group items</em> and then iterate over these groups. In addition, XSLT now can be used as a <em>typed programming language</em>, which consumes and produces <em>typed trees</em> instead of just well-formed XML trees.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="xslt20-functions">
			<title>User-Defined Functions</title>
			<slide>
				<title>Reusable Code in XPaths</title>
				<ul>
					<li>XSLT 1.0 allows code reuse based on <em>templates</em></li>
					<ul>
						<li><link href="xslt-named-templates"/> can be called by name like subroutines</li>
						<li>they can only be called in XSLT code, not in an XPath</li>
					</ul>
					<li>User-defined functions in XPaths support modular code</li>
					<ul>
						<li>any code that produces a sequence of items</li>
						<li>input parameters can be specified (as for templates and stylesheets)</li>
						<li>the function is called using regular XPath function call syntax</li>
					</ul>
					<li>User-defined functions always must use a namespace</li>
					<ul>
						<li>the function definition must use an existing namespace prefix</li>
						<li>the function call must use the same namespace for the function name</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Function Definition and Call</title>
				<listing src="references.xsl" line="55-58"/>
				<listing src="references.xsl" line="11-23"/>
			</slide>
		</part>
		<part id="xslt20-grouping">
			<title>Grouping</title>
			<slide>
				<title>Aggregating Items</title>
				<ul>
					<li>Lists of items often have to be grouped by some value</li>
					<ul>
						<li>addresses may be grouped by initial letter or city or ZIP code</li>
						<li>sports events may be grouped by location or team or season</li>
						<li>restaurants may be grouped by kitchen or location or quality</li>
					</ul>
					<li>Grouping needs to <em>identify</em> and <em>build</em> groups</li>
					<ul>
						<li>for small datasets, implementation issues are not really relevant</li>
						<li>for large datasets, performance is greatly affected by the implementation</li>
					</ul>
					<li>XSLT 1.0 has no support for grouping</li>
					<ul>
						<li>XPath axes can be used to find the first member of each group</li>
						<pre>contact[not(surname = preceding-sibling::contact/surname)]</pre>
						<li>this expression can become very expensive to evaluate for large datasets</li>
						<li><link href="muenchian-grouping"/> is the best that can be done in XSLT 1.0</li>
					</ul>
				</ul>
			</slide>
			<slide id="muenchian-grouping">
				<title>Muenchian Grouping</title>
				<listing src="muenchian-data.xml" line="2-14" href="http://www.jenitennison.com/xslt/grouping/muenchian.html"/>
				<listing src="muenchian-code.xsl" line="3-18" href="http://www.jenitennison.com/xslt/grouping/muenchian.html"/>
			</slide>
			<slide>
				<title>Grouping and Iterating over Groups</title>
				<listing src="references.xsl" line="27-47"/>
			</slide>
		</part>
		<part id="xslt20-typing">
			<title>Typed Programming</title>
			<slide>
				<title>XDM and XPath 2.0 Support Types</title>
				<ul>
					<li>Useful for predefined types such as <xsdtype>date</xsdtype> and <xsdtype>dateTime</xsdtype></li>
					<li>When working with DTDs, values must be explicitly cast</li>
					<li>When working with <link href="xsdl-1">XSDL</link>, types are inferred from the schema</li>
					<ul>
						<li>documents are validated before they are processed</li>
						<li>validating turn the XML tree into a type-annotated tree</li>
						<li>XSLT 2.0 has access to the type information and treats nodes as typed</li>
					</ul>
					<li>Schema-aware XSLT allows type control for output documents</li>
					<ul>
						<li><xslte>result-document</xslte> and <xslte>document</xslte> create new document trees</li>
						<li><xslta>validation</xslta> and <xslta>type</xslta> control validation for these trees</li>
						<li>only schema-aware XSLT 2.0 processors support these attributes</li>
					</ul>
				</ul>
			</slide>
			<part id="xslt20-simple-types">
				<title>Typed Nodes</title>
				<slide>
					<title>XSDL Simple Types</title>
					<ul>
						<li><link href="xsdl-simple-types"/> are used to type non-markup values</li>
						<ul>
							<li>mostly <em>atomic types</em> which describe the content of an element or attribute</li>
							<li><em>union types</em> and <em>list types</em> describe alternative or repeatable types</li>
						</ul>
						<li>Working with simple types is supported in any XSLT 2.0 processor</li>
						<ul>
							<li>the basic XSDL datatypes are part of <link href="xdm">XDM</link> and always available</li>
							<pre>xs:date( if ( $i castable as xs:date ) then $i else '2000-01-01' )</pre>
						</ul>
						<li>Working with user-defined simple types is a different story</li>
						<ul>
							<li><link href="xsdl-simple-type-restriction"/> is used to derive new simple types</li>
							<li>users can build their own specialized type library</li>
							<li>such a library is a schema and requires a schema-aware XSLT 2.0 processor</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xslt20-complex-types">
				<title>Typed Trees</title>
				<slide>
					<title>XSDL Complex Types</title>
					<ul>
						<li><link href="xsdl-complex-types"/> are used to type markup structures</li>
						<ul>
							<li>they are always associated with element nodes</li>
							<li>they describe the content model and allowed attributes for that type</li>
						</ul>
						<li>Working with complex types very much depends on the schema design</li>
						<ul>
							<li>if the schema tries to reflect the model in the types, it may be useful</li>
							<li>if the schema has been generated by some tool, types are not very relevant</li>
						</ul>
						<li>A more interesting idea would be to actually inspect a type</li>
						<ul>
							<li>upon finding a type, find out what attributes are optional but not required</li>
							<li>this would require access not only to the <em>type name</em>, but to the <em>type definition</em></li>
							<li>this is a very interesting research issue, but not yet possible in XSLT 2.0</li>
							<li>the <a href="http://dret.net/netdret/publications#wil07e">Schema Component XML Syntax (SCX)</a> is an attempt to solve this problem</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Consider Upgrading</title>
				<ul>
					<li>Going from XSLT 1.0 to 2.0 supports more data types</li>
					<li>Going to schema-aware XSLT 2.0 supports typed documents</li>
					<li>For serious programming, type checking is very useful</li>
					<li>Start using XSLT 2.0 as soon as possible</li>
					<li>Start using schema-aware XSLT 2.0 as soon as required</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xsdl-1">
		<title short="XSDL 1">XSDL – Part I</title>
		<date>2007-10-23</date>
		<toc class="resources"><a href="xsdl-quickref.pdf">XSDL QuickRef</a>&#160;· <a href="http://www.w3.org/XML/Schema" title="W3C XML Schema Home">XML Schema</a></toc>
		<toc class="abstract">The <em>XML Schema Definition Language (XSDL)</em> is the most popular schema language for XML today. It has been introduced to overcome some of the commonly observed limitations of DTDs, most notably the lack of typing. <em>Simple Types</em> describe content which is not structured by XML markup, which means it describes attribute values and element content. Simple types can be defined by deriving new types from existing types by using type restriction.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Bad Names</title>
			<blockquote>XML Schema is a language for describing an XML schema.<br/>An XML schema can be defined using XML Schema.<br/>I would like to use XML Schema for my XML schema.</blockquote>
			<ul>
				<li>The two most awkward name choices in the XML arena:</li>
				<ol>
					<li><em>XML Schema</em>, which is simply <u>a</u> XML schema language (among many others)</li>
					<li><em>Open XML</em>, which is simply an XML language for encoding office documents</li>
				</ol>
				<li>Naming things means <q>getting into people's heads</q></li>
				<ul>
					<li>pretentious and all-embracing name choices serve a certain purpose</li>
					<li><em>XSD</em> and <em>WXS</em> are two semi-official acronyms for XML Schema</li>
				</ul>
				<li>Good news: <em>XML Schema</em> <a href="http://dret.typepad.com/dretblog/2007/08/xml-schema-11-x.html">now has been renamed</a></li>
				<ul>
					<li><em>XSDL</em> now is the official acronym for the <em>XML Schema Definition Language</em></li>
					<li>(very officially, this name change affects version 1.1 of the language only …)</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>What's Wrong With DTDs?</title>
			<ul>
				<li>DTDs do not support application-level datatypes</li>
				<ul>
					<li>XML for B2B is very data-centric and needs typing</li>
					<li>SGML was created for documents where typing was less important</li>
				</ul>
				<li>DTDs do not support any relationships between markup constructs</li>
				<ul>
					<li>content models cannot be reused</li>
					<li>attribute lists cannot be reused</li>
					<li>structural relationships cannot be exploited in the DTD</li>
					<li><link href="param-entity"/> are used as a hack to work around this limitation</li>
				</ul>
				<li>DTD + XML Namespaces = Bad idea!</li>
			</ul>
		</slide>
		<slide>
			<title>Different Levels of Semantics</title>
			<ul>
				<li>XSDL's simple data type provide some semantics</li>
				<ul>
					<li>a formerly undescribed attribute can now be described as being a <xsdtype>date</xsdtype></li>
					<li>it can be understood as being a date and inserted into a calendar</li>
					<li>but what kind of date is it? a birthday? an order date? a shipping date?</li>
					<li>a question of the <em>context</em> of where the <xsdtype>date</xsdtype> appears</li>
				</ul>
				<li>XSDL better supports model-level information</li>
				<ul>
					<li>however, XSDL also only captures part of the application semantics</li>
					<li>XSDL is usually more specific than a DTD, because it contains types</li>
					<li>types provide information about the basic datatypes being used</li>
					<li>additional semantics (e.g., different kinds of dates) must be documented elsewhere</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Schema-Validation and Applications</title>
			<img src="schema-valid-documents.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<slide>
			<title>Validation and Typing</title>
			<ul>
				<li>XSDL does two things at the same time:</li>
			</ul>
			<ol>
				<li>Validation checks for structural integrity (is the document <em>schema-valid</em>?)</li>
				<ul>
					<li>checking elements and attributes for proper usage (as with DTDs)</li>
					<li>checking element contents and attribute values for proper values</li>
				</ul>
				<li>Type annotations make the types available to applications</li>
				<ul>
					<li>instead of having to look at the schema, applications get the <em>Post-Schema Validation Infoset (PSVI)</em></li>
					<li>type-based applications (such as XSLT 2.0) can work on the typed instance</li>
				</ul>
			</ol>
		</slide>
		<slide>
			<title>XSDL Syntax</title>
			<img src="xml-technology-syntaxes.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<part id="xsdl-types">
			<title>XSDL Types</title>
			<slide>
				<title>What is a Type?</title>
				<ul>
					<li>A type is a <em>set of values</em></li>
					<ul>
						<li>the values can be enumerated (<em>home, mobile, office</em>)</li>
						<li>the values can be described by extension (intervals, regular expressions)</li>
					</ul>
					<li>DTD have (almost) no types</li>
					<ul>
						<li>element content is always <xml>#PCDATA</xml> (any number of any characters)</li>
						<li>attributes most often are <xml>CDATA</xml>  (any number of any characters)</li>
						<li>attributes may have enumerated types (but no extensional types)</li>
						<li>attributes may use <link href="ididref"/></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XSDL vs. DTD</title>
				<div style="margin : 2% ; ">
					<table width="90%" cellspacing="20">
						<col/>
						<colgroup span="2"/>
						<thead>
							<tr>
								<td/>
								<th>DTD</th>
								<th>XSDL</th>
							</tr>
						</thead>
						<tbody>
							<tr>
								<th>Concepts</th>
								<td colspan="2" align="center">some conceptual model (formal/informal)</td>
							</tr>
							<tr>
								<th>Types</th>
								<td style="color : gray ; "><xml>ID/IDREF</xml> and (<xml>#P</xml>)<xml>CDATA</xml></td>
								<td>Hierarchy of Simple and Complex Types</td>
							</tr>
							<tr>
								<th>Markup Constructs</th>
								<td>Element Type Declarations<br/><xml>&lt;!ELEMENT order …</xml></td>
								<td>Element Definitions<br/><xml>&lt;xs:element name="order"> …</xml></td>
							</tr>
							<tr>
								<th>Instances (Documents)</th>
								<td colspan="2" align="center"><xml>&lt;order date=""> [ order content ] &lt;/order></xml></td>
							</tr>
						</tbody>
					</table>
				</div>
			</slide>
			<slide>
				<title>Document/Data Perspectives</title>
				<ul>
					<li>XML as documents is text interspersed with structure</li>
					<ul>
						<li>XML captures text structures that support document processing</li>
						<li>without these structures, the text remains usable (as unstructured text)</li>
						<li>structure is good, but not indispensable</li>
					</ul>
					<li>XML as data is structure filled with data</li>
					<ul>
						<li>programmers think about classes and objects, so they need types</li>
						<li>without structure, data-centric XML is completely useless</li>
						<li>programmers often view XML as wire format and types as the portal to their objects</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xsdl-simple-types">
			<title>Simple Types</title>
			<slide>
				<title>What are Simple Types?</title>
				<ul>
					<li>Simple types describe values not structured by XML markup</li>
					<ul>
						<li>they describe attribute values (<xml>date="2006-10-03"</xml>)</li>
						<li>they describe element content (<code>&lt;phone>+1-510-6432253&lt;/phone></code>)</li>
					</ul>
					<li>Simple types can be used for elements or attributes</li>
					<ul>
						<li>XSDL treats contents in elements and attributes equally</li>
						<li>simple type libraries can be designed independent of their eventual use</li>
					</ul>
					<li>Simple types are available in three flavors</li>
					<ul>
						<li><em>atomic types:</em> one value of one type (one number in some range)</li>
						<li><em>union types:</em> one value of a union of types (a number or the string <q><code>undefined</code></q>)</li>
						<li><em>list types:</em> a whitespace-separated list of values (<elem>phone type="home office"</elem>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Named vs. Anonymous</title>
				<ul>
					<li>Types can be <em>named</em> or <em>anonymous</em></li>
					<ul>
						<li>named types have a name and can be referenced (and thus be reused)</li>
						<li>anonymous types have no name and can only be used where they are defined</li>
					</ul>
				</ul>
				<listing src="named-anonymous-simple.xsd" line="3-9"/>
				<listing src="named-anonymous-simple.xsd" line="17-23"/>
			</slide>
			<slide>
				<title>Type Definitions</title>
				<ul>
					<li>Simple types are sets of values</li>
					<ul>
						<li>named simple types are sets of values with a name (and thus reusable)</li>
						<li>anonymous simple types are sets of values defined where they are needed</li>
					</ul>
					<li>Simple types are defined to represent model-level information</li>
					<ul>
						<li>in most cases, they will have restrictions associated with them</li>
						<li>they may also simply be tags for semantics (fax and phone numbers share the same value space)</li>
					</ul>
					<li>XSDL has a library of <em>built-in datatypes</em></li>
					<ul>
						<li><em>ur-types</em> are the conceptual grounding of all types</li>
						<li><em>primitive types</em> are the types that are there <q>by definition</q></li>
						<li><em>derived types</em> are based on primitive types</li>
						<li>users can derive their own types using <link href="xsdl-simple-type-restriction"/></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Type Hierarchy</title>
				<img style="height : 75% ; margin : 2% ; " src="xsd-type-hierarchy.gif" href="http://www.w3.org/TR/xmlschema-2/#built-in-datatypes"/>
			</slide>
			<part id="xsdl-simple-type-restriction">
				<title>Simple Type Restriction</title>
				<slide>
					<title>Built-In Types</title>
					<listing src="built-in.xsd"/>
				</slide>
				<slide>
					<title>How to Restrict</title>
					<ul>
						<li>Simple types can be derived by restriction</li>
						<ul>
							<li>the <em>base type</em> must be a simple type</li>
							<li>the <em>derived type</em> will be a simple type</li>
							<li>all simple types form a tree, rooted at the <code>anySimpleType</code></li>
						</ul>
						<li>Restriction are based on facets</li>
						<ul>
							<li>each restriction can use <em>0-n</em> facets</li>
							<li>facets can be refined in further simple type restrictions</li>
							<li>XSDL designers should try to restrict types as much as possible</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Facets</title>
					<ul>
						<li>Facets define a certain way of restricting a simple type</li>
						<ul>
							<li>facets are independent, but they may interact (<code>minLength</code> and <code>maxLength</code>)</li>
							<li>XSDL defines 12 <em>constraining facets</em> which may be used for restrictions</li>
							<li><code>length, minLength, maxLength, pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minExclusive, minInclusive, totalDigits, fractionDigits</code></li>
						</ul>
						<li>Facets may be repeated in different levels of the type hierarchy</li>
						<ul>
							<li>they may only further restrict the facet (e.g., reducing the <code>maxLength</code>)</li>
							<li>facets apply to all directly or indirectly derived subtypes</li>
							<li>facets may be <em>fixed</em> (no further restriction is allowed)</li>
						</ul>
						<li>Not all facets are applicable to all types</li>
						<ul>
							<li>the applicability depends on the <em>primitive type</em> being used</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Facet Applicability</title>
					<div style="margin : 2% ; ">
						<table width="90%">
							<tr>
								<th align="right" valign="top"><code>string</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>boolean</code></th>
								<td>pattern, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>float</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>double</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>decimal</code></th>
								<td>totalDigits, fractionDigits, pattern, whiteSpace, enumeration, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>duration</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>dateTime</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>time</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>date</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gYearMonth</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gYear</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gMonthDay</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gDay</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gMonth</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>hexBinary</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>base64Binary</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>anyURI</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>QName</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>NOTATION</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
						</table>
					</div>
				</slide>
				<slide>
					<title>Patterns</title>
					<ul>
						<li>Patterns restrict the <em>lexical space</em> of simple types</li>
						<ul>
							<li>most other facets restrict the <em>value space</em> (e.g., intervals of numbers)</li>
							<li>in many cases, patterns are useful additions to value-oriented facets</li>
						</ul>
						<li>Patterns are <a href="http://www.w3.org/TR/xmlschema-2/#regexs">regular expressions</a></li>
						<ul>
							<li>they support many common regex constructs and Unicode</li>
							<li>the language pattern allows <q><code>de</code></q>, <q><code>de-CH</code></q>, and other tags</li>
							<li>the pattern checks for lexical correctness, not against a code list</li>
						</ul>
					</ul>
					<pre>([a-zA-Z]{2}|[iI]-[a-zA-Z]+|[xX]-[a-zA-Z]{1,8})(-[a-zA-Z]{1,8})*</pre>
				</slide>
				<slide>
					<title>Simple Type Examples</title>
					<listing src="simple-examples.xsd"/>
				</slide>
				<slide>
					<title>Facet Limitations</title>
					<ul>
						<li>Facets limit one dimension of a type's value space</li>
						<ul>
							<li>using <code>pattern</code>, the lexical space can also be restricted</li>
							<li>restrictions should be made as specific as possible</li>
							<li>no limitations are possible beyond the predefined facets</li>
						</ul>
						<li>There is no connection to the context within the document</li>
						<ul>
							<li>facets cannot make references to other values (e.g., neighboring attributes)</li>
						</ul>
						<li>Additional constraints should be documented</li>
						<ul>
							<li>documentation enables applications to implement constraint checking</li>
							<li>other schema languages (like <link href="schematron"/>) may be used to express these constraints</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Typed XML Content</title>
				<ul>
					<li>XSDL introduces a <q>type layer</q> to schema languages</li>
					<li>Types facilitate abstractions (and thus modeling)</li>
					<li>Simple types can be restricted to yield more specific types</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xsdl-2">
		<title short="XSDL 2">XSDL – Part II</title>
		<date>2007-10-25</date>
		<toc class="resources"></toc>
		<toc class="abstract">XSDL <em>Complex Types</em> describe element content if this content is using attributes and/or element content other than only character data. Thus, complex types are used to define the allowed markup structures for a class of documents. Using XSDL's type concepts, it is easier to represent model-level information in a schema, because type hierarchies can represent model-level specializations.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="xsdl-complex-types">
			<title>Complex Types</title>
			<slide>
				<title>What is a Complex Type?</title>
				<ul>
					<li>Complex types describe the allowed element content</li>
					<ul>
						<li>they describe what the element may contain (the element's <em>content model</em>)</li>
						<li>they describe the attributes that an element may have (the element's <em>attribute list</em>)</li>
					</ul>
					<li>Complex types do not define the element name</li>
					<ul>
						<li>the complex type defines which content is allowed for the element</li>
						<li>the element definition uses the complex type to define the allowed element content</li>
					</ul>
					<li>Complex types have similar properties to simple types</li>
					<ul>
						<li>they can be named or anonymous</li>
						<li><link href="xsdl-complex-type-derivation"/> can be used to construct a type hierarchy</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Complex Type Example</title>
				<listing src="complex-example.xsd"/>
			</slide>
			<slide>
				<title>Complex Types &amp; Content Types</title>
				<ul>
					<li>Complex types can have different kinds of content</li>
					<ul>
						<li><em>simple content</em> refers to simple type content using additional attributes</li>
						<li><em>complex content</em> is anything else (anything beyond <em>simple type content</em>)</li>
					</ul>
					<li><link href="xsdl-complex-type-derivation"/> heavily depends on this classification</li>
				</ul>
				<div style="margin : 2% ; ">
					<table width="90%" cellspacing="20" rules="all" frame="border">
						<tr>
							<th rowspan="3">Simple Types</th>
							<th colspan="4">Complex Types</th>
						</tr>
						<tr>
							<th rowspan="2">Simple Content</th>
							<th colspan="3">Complex Content</th>
						</tr>
						<tr>
							<td align="center">Element Only</td>
							<td align="center">Mixed</td>
							<td align="center">Empty</td>
						</tr>
					</table>
				</div>
			</slide>
			<part>
				<title>Content Models</title>
				<slide>
					<title>DTD Content Models</title>
					<ul>
						<li><link href="dtd-element"/> in DTDs uses a compact syntax</li>
						<ul>
							<li>XSDL supports the same facilities with a more verbose syntax</li>
							<li>XSDL adds features which DTDs do not support</li>
						</ul>
						<li>DTDs allow elements to be mandatory, optional, repeatable, or optional and repeatable</li>						
						<ul>
							<li>XSDL allows the cardinality to be specified</li>
						</ul>
						<li>DTDs allow sequences (<q><code>,</code></q>) and alternatives (<q><code>|</code></q>)</li>
						<ul>
							<li>XSDL introduces a (very limited) operator for <em>all groups</em></li>
						</ul>
						<li>Apart from the syntax, XSDL content models are not very different</li>
					</ul>
				</slide>
				<slide>
					<title>Mixed Content</title>
					<ul>
						<li>DTDs define mixed content by mixing <code>#PCDATA</code> into the content model</li>
						<ul>
							<li>DTDs always require mixed content to use the form <code>( #PCDATA | a | b )*</code></li>
							<li>the occurrence of elements in mixed content cannot be controlled</li>
						</ul>
						<li>XSDL defines mixed content outside of the content model</li>
						<ul>
							<li>the content model is defined like an element-only content model</li>
							<li>the <code>mixed</code> attribute on the type marks the type as being mixed</li>
						</ul>
						<li>XSDL mixed content can use all model groups</li>
						<ul>
							<li>it is possible to constrain element occurrences in the same way as in element-only content</li>
							<li>in practice, this feature is rarely used (mixed content often is very loosely defined)</li>
						</ul>
					</ul>
					<listing src="global-local.xsd" line="3-11"/>
				</slide>
				<slide>
					<title>Empty Content</title>
					<ul>
						<li>DTDs have a special keyword for empty elements</li>
						<ul>
							<li>instead of the content model, the keyword <code>EMPTY</code> is used</li>
							<li>empty elements may still have attribute lists associated with them</li>
						</ul>
						<li>XSDL empty types are defined implicitly</li>
						<ul>
							<li>there is no explicit keyword for defining an empty type</li>
							<li>if a type has no model group inside it, it is empty (it still may have attributes)</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>XSDL Design Patterns</title>
			<slide>
				<title>Global Elements? Named Types?</title>
				<ul>
					<li>XSDL's design dimensions allow different strategies</li>
					<li>Elements/attributes can be defined as <em>local</em> or <em>global</em></li>
					<ul>
						<li><em>local</em> elements/attributes are defined where they are used</li>
						<li><em>global</em> elements/attributes are globally defined and then referenced</li>
						<li>elements/attributes which should be reused must be defined as global</li>
					</ul>
					<li>Types can be defined as <em>anonymous</em> or <em>named</em></li>
					<ul>
						<li><em>anonymous</em> types are defined where they are used</li>
						<li><em>named</em> types are globally defined and then referenced</li>
						<li>types which should be reused must be defined as named</li>
					</ul>
					<li>XSDL design patterns can favor one type of definition</li>
					<ul>
						<li>reuse always required <em>global</em> elements/attributes and <em>named</em> types</li>
						<li>having everything <em>global/named</em> results in bigger schema</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Sample XML</title>
				<listing src="design-sample.xml"/>
			</slide>
			<part>
				<title>Russian Doll</title>
				<slide>
					<title>Russian Doll XSDL</title>
					<listing src="russian-doll.xsd"/>
				</slide>
				<slide>
					<title>Russian Doll Structure</title>
					<img style="width : 75% ; margin : 4% ; " src="russian-doll.gif" title="Russian Doll XSDL"/>
				</slide>
			</part>
			<part>
				<title>Venetian Blinds</title>
				<slide>
					<title>Venetian Blinds XSDL</title>
					<listing src="venetian-blinds.xsd"/>
				</slide>
				<slide>
					<title>Venetian Blinds Structure</title>
					<img style="width : 75% ; margin : 4% ; " src="venetian-blinds.gif" title="Venetian Blinds XSDL"/>
				</slide>
			</part>
			<part>
				<title>Salami Slice</title>
				<slide>
					<title>Salami Slice XSDL</title>
					<listing src="salami-slice.xsd"/>
				</slide>
				<slide>
					<title>Salami Slice Structure</title>
					<img style="width : 75% ; margin : 4% ; " src="salami-slice.gif" title="Salami Slice XSDL"/>
				</slide>
			</part>
			<part>
				<title>Garden of Eden</title>
				<slide>
					<title>Garden of Eden XSDL</title>
					<listing src="garden-of-eden.xsd"/>
				</slide>
				<slide>
					<title>Garden of Eden Structure</title>
					<img style="width : 75% ; margin : 4% ; " src="garden-of-eden.gif" title="Garden of Eden XSDL"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Typed XML Structures</title>
				<ul>
					<li>XSDL introduces a <q>type layer</q> to schema languages</li>
					<li>Types facilitate abstractions (and thus modeling)</li>
					<li>Complex types define how elements have to be used</li>
					<li>Designing XSDL can be done in many different ways</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xsdl-3">
		<title short="XSDL 3">XSDL – Part III</title>
		<date>2007-10-30</date>
		<toc class="resources"><a href="http://www.awprofessional.com/articles/printerfriendly.asp?p=31477&amp;rl=1">XSDL Identity Constraints</a></toc>
		<toc class="abstract">XSDL allows greater flexibility in defining constraints on intra-document references than the ID/IDREF construct of DTDs. XSDL's <em>Identity Constraints</em> are scoped, typed, and can be used for elements or attributes. They are more powerful that the DTD's limited ID/IDREF mechanism, but still lack sufficient generality to support a really wide set of model constraints to be expressed.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Local and Global Definitions</title>
			<slide>
				<title>Named and Anonymous Types</title>
				<ul>
					<li>Types can be named or anonymous</li>
					<ul>
						<li>named types can be reused (for elements, attributes, or type derivation)</li>
						<li>anonymous types can only be used where they are defined</li>
					</ul>
					<li>DTD <q>types</q> are always anonymous (they cannot be reused)</li>
				</ul>
				<pre>&lt;!ELEMENT person <span style="color : red ; ">(name, address) >
&lt;!ATTLIST person id ID #REQUIRED ></span></pre>
				<ul>
					<li>DTDs have everything hardcoded</li>
					<ul>
						<li>complex types are always locally defined</li>
						<li>elements are always globally defined</li>
						<li>attributes are always locally defined</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Elements</title>
				<slide>
					<title>Local vs. Global Elements</title>
					<ul>
						<li>Elements can be defined in a type or in the schema</li>
						<ul>
							<li>local elements can only be used where they are defined</li>
							<li>global elements can be reused, they can serve as building blocks</li>
						</ul>
						<li>Elements and complex types depend on each other</li>
						<ul>
							<li>an element is defined by a type, often this will be a complex type</li>
							<li>a complex type is defined by its contents, which are elements and/or attributes</li>
						</ul>
					</ul>
					<listing src="global-local.xsd" line="4-12"/>
				</slide>
				<slide>
					<title>Reusable Elements</title>
					<listing src="complex-example.xsd"/>
				</slide>
			</part>
			<part>
				<title>Attributes</title>
				<slide>
					<title>Attribute Definitions</title>
					<ul>
						<li>DTDs treat attributes as something entirely different from element content</li>
						<ul>
							<li>they are defined in an <code>ATTLIST</code>, not in the <code>ELEMENT</code> definition</li>
							<pre>&lt;!ELEMENT person (name, address) >
<span style="color : red ; ">&lt;!ATTLIST person id ID #REQUIRED ></span></pre>
							<li>they have a special range of <link href="dtd-attr-type"/> as opposed to elements</li>
							<pre>&lt;!ATTLIST person id <span style="color : red ; ">ID</span> #REQUIRED ></pre>
						</ul>
						<li>XSDL overcomes these restrictions only partially</li>
						<ul>
							<li><link href="xsdl-simple-types"/> are used to define attribute (or element) contents</li>
							<li>attributes are still described as something entirely different from an element's content model</li>
						</ul>
						<li>Attributes could be better integrated into the model</li>
						<ul>
							<li><link href="relax-ng"/> treats attributes as part of an element's content model</li>
							<li>this makes it trivial to have choices of element content and attributes</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Reusing Attributes</title>
					<ul>
						<li>DTDs treat attributes as something local to an element</li>
						<ul>
							<li>attributes are defined in an element's <code>ATTLIST</code></li>
							<li>reusing attributes for more than on element requires <link href="param-entity"/></li>
						</ul>
						<li>XSDL better supports reuse of schema components</li>
						<ul>
							<li>types can be defined locally (anonymous) or globally (named)</li>
							<li>elements and attributes can be defined globally or locally</li>
						</ul>
						<li>Globally defined attributes can be reused</li>
						<ul>
							<li>the attribute definition does not tie it to any occurrence</li>
							<li>the attribute can then be referenced from an complex type definition</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Reusing Attributes (Example)</title>
					<listing src="global-local.xsd"/>
				</slide>
			</part>
		</part>
		<part id="xsdl-names">
			<title>Names and Namespaces</title>
			<slide>
				<title>Definitions</title>
				<ul>
					<li>Many XSDLs define a vocabulary for a namespace</li>
					<ul>
						<li>DTDs do not have any support for namespaces</li>
						<li>XSDL heavily builds on <link href="xmlns"/></li>
					</ul>
					<li>XSDL provides support for declaring a vocabulary's namespace</li>
					<pre>&lt;xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" <span style="color : red ; ">targetNamespace="http://www.example.com/"</span>></pre>
					<li>Schema-validation can check for proper namespace usage</li>
					<ul>
						<li>the <xml>targetNamespace</xml> has to be used in the instance</li>
						<li>if the namespace does not match, validation cannot succeed</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Instances</title>
				<ul>
					<li>The schema defines the <xml>targetNamespace</xml> of the vocabulary</li>
					<ul>
						<li>all globally defined elements, attributes, and types are in that namespace</li>
						<li>the instances must declare and use the namespace to be schema-valid</li>
					</ul>
				</ul>
				<listing src="multicol.html" line="2-5"/>
				<ul>
					<li>A <em>prefixed name</em> is not the same as a <em>qualified name</em></li>
					<ul>
						<li>if there is a default namespace, unprefixed elements are still qualified</li>
					</ul>
					<li>Nasty details about XML Namespaces and attributes</li>
					<ul>
						<li>the default namespace does <em>not</em> apply to attributes</li>
						<li>attributes must therefore <em>always be prefixed</em> if they need to be qualified</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Name Qualification</title>
				<ul>
					<li>Global elements and attributes have to be used as qualified names</li>
					<ul>
						<li>this means that they must be referred to by their namespace-qualified name</li>
						<li>if a default namespace is used, elements are qualified <em>without carrying a prefix</em></li>
						<li>since the default namespace does not apply to attributes, they always must be explicitly prefixed</li>
					</ul>
					<li>Local elements and attributes may be used qualified or unqualified</li>
					<ul>
						<li>this control <em>only applies to locally define elements or attributes</em></li>
						<li>the default defined by XSDL is not a good choice</li>
						<li>because of how XML Namespaces work, a non-default choice is recommended</li>
					</ul>
					<li>XSDL allows control over how local names have to be used</li>
				</ul>
				<pre>&lt;xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://www.example.com/" <span style="color : red ; ">elementFormDefault="qualified" attributeFormDefault="unqualified"</span>></pre>
			</slide>
		</part>
		<part>
			<title>Identity Constraints</title>
			<slide>
				<title>Element = Type + Constraints</title>
				<ul>
					<li>DTDs and XSDL are mainly about specifying grammars</li>
					<ul>
						<li>types describe the allowed values using grammars</li>
						<li>grammar-oriented schemas have some nice properties</li>
					</ul>
					<li>DTD's <link href="ididref"/> allow additional constraints</li>
					<ul>
						<li>apart from the grammar definition, cross-references in the tree are supported</li>
						<li>validation checks the integrity of the cross-references, not only the tree</li>
					</ul>
					<li>DTD's ID/IDREF are a very simple mechanism</li>
					<ul>
						<li>they are always global</li>
						<li>they also define the type of the attribute (XML names)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Improvements over ID/IDREF</title>
				<ul>
					<li>XSDL's <em>Identity Constraints</em> improve DTD's ID/IDREF</li>
					<li>Identity constraints are scoped and apply only to a selected set of nodes</li>
					<ul>
						<li>the constraint applies only to a selected set of nodes (using XPath)</li>
					</ul>
					<li>Identity constraints are evaluated using typed values</li>
					<ul>
						<li><code>ID</code>s must be XML names (no numbers allowed)</li>
						<li><q><code>2</code></q> ≟ <q><code>+00002</code></q> should be evaluated based on the type (string or decimal?)</li>
						<li>XSDL separates the constraint from the type of the selected nodes</li>
					</ul>
					<li>Identity constraints may select elements or attributes</li>
					<ul>
						<li>XPaths are used to select the constrained values, they can select elements or attributes</li>
					</ul>
					<li>Multiple fields</li>
					<ul>
						<li>it is possible to select more than one field for a constraint (phone &amp; area code must be unique)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Types of Identity Constraints</title>
				<ul>
					<li>Uniqueness constraints</li>
					<ul>
						<li>if there is a field, it must have a unique value among the selected nodes</li>
					</ul>
					<li>Key constraints</li>
					<ul>
						<li>there must be a field, and it must have a unique value among the selected nodes</li>
					</ul>
					<li>Key reference constraints</li>
					<ul>
						<li>the field must refer to an existing value in the referred key</li>
						<li>if the key reference also is constrained by a key, only one reference may use the referred key</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Identity Constraint Definitions</title>
				<ul>
					<li>Identity constraints are part of an element definition</li>
					<li>There are <em>three important factors</em> to an identity constraint</li>
					<ol>
						<li>location of the identity constraint's definition</li>
						<li>the nodes to which the constraint should be applied</li>
						<li>the fields which are used to evaluate the constraint</li>
					</ol>
					<li>If the constraint is a key reference constraint, there is a <em>fourth factor</em></li>
					<ol start="4">
						<li>the key constraint that is used for checking the references</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Identity Constraint Evaluation</title>
				<img style="width : 90% ; margin : 4% ; " src="identity-constraints.png"/>
			</slide>
			<slide>
				<title>Advanced Identity Constraints</title>
				<img style="width : 90% ; margin : 4% ; " src="identity-constraints++.png"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSDL Features</title>
				<ul>
					<li>Identity constraints enable non-grammar constraints to be expressed</li>
					<li>XSDL identity constraints are more powerful than the DTD ID/IDREF concept</li>
					<li>Only a subset of structural constraints can be expressed in XSDL</li>
					<li>XSDL 1.1 introduces the concept of <em>assertions</em> (XPath-based constraints)</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xsdl-4">
		<title short="XSDL 4">XSDL – Part IV</title>
		<date>2007-11-01</date>
		<toc class="resources"></toc>
		<toc class="abstract">XSDL complex types can be derived by <em>restriction</em> or <em>extension</em>. Complex type restriction defines the restricted type to be a more restricted version of the base type. Complex type extension make it possible to extend the base type by either adding attributes or contents (only by appending new content to the content model). Complex type derivation allows XSDL to express type hierarchies of complex types, which can be aligned with more or less specialized code for processing instances of these types.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Types in XSDL</title>
			<ul>
				<li><link href="xsdl-simple-types"/> are used to model non-markup values</li>
				<li><link href="xsdl-simple-type-restriction"/> is used to define simple user types</li>
				<li><link href="xsdl-complex-types"/> are used to model markup structures</li>
				<li>Models often use generalization and specialization</li>
				<ul>
					<li>not all models use this modeling technique</li>
					<li>OO methods have made this very popular (UML is OO)</li>
				</ul>
				<li><link href="xsdl-complex-type-derivation"/> is used to represent model hierarchies</li>
				<ul>
					<li>in theory, this could be used by generalized and specialized code</li>
					<li>in practice, the connection between XSDL and code is too weak to be robust</li>
				</ul>
			</ul>
		</slide>
		<part id="xsdl-complex-type-derivation">
			<title>Complex Type Derivation</title>
			<slide>
				<title>Type Derivation</title>
				<ul>
					<li>XSDL supports the modeling approach of <em>specialization</em></li>
					<ul>
						<li>simple types can be restricted to create more specialized simple types</li>
						<li>each value of a restricted type is also a valid value of the more general type</li>
					</ul>
					<li>Complex types are combinations of content and attributes</li>
					<li>Specialization of complex types can be done in two ways</li>
					<ul>
						<li><link href="xsdl-complex-type-restriction"/>: more restricted ways of using the content and/or attributes</li>
						<li><link href="xsdl-complex-type-extension"/>: additional content and/or attributes may be used</li>
					</ul>
					<li>Both kinds of complex type derivation can be regarded as specialization</li>
					<ul>
						<li><link href="xsdl-complex-type-restriction"/>: for US persons the country must always be set to <q><code>US</code></q></li>
						<li><link href="xsdl-complex-type-extension"/>: people having an employee number are employees</li>
					</ul>
				</ul>
			</slide>
			<part id="xsdl-complex-type-restriction">
				<title>Complex Type Restriction</title>
				<slide>
					<title>Removing Choices</title>
					<ul>
						<li>Complex types usually allow variability</li>
						<ul>
							<li><xml>minOccurs</xml> and <xml>maxOccurs</xml> allow variability in occurrences</li>
							<li><xml>choice</xml> groups allow to choose between a number of alternatives</li>
							<li>attributes may be flagged as <code>use="optional"</code></li>
							<li>simple types allow the individual values to use certain sets of values</li>
						</ul>
						<li>Complex type restriction allows restrictions of all these variations</li>
						<ul>
							<li><xml>minOccurs</xml> and <xml>maxOccurs</xml> can be made more restrictive</li>
							<li>alternatives can be removed from choice groups</li>
							<li>optional attributes can flagged as <code>use="required"</code> or <code>use="prohibited"</code></li>
							<li>the simple types of values can be set to more restricted simple types</li>
						</ul>
						<li>The technical way of defining restrictions is cumbersome</li>
						<ul>
							<li>when the base type changes, the restricted type has to be fixed by hand</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Complex Type Restriction (Example)</title>
					<listing src="complex-restriction.xsd"/>
				</slide>
				<slide>
					<title>Processing Restricted Complex Types</title>
					<ul>
						<li>Values of restricted types are values of the base types</li>
						<ul>
							<li>type restriction is defined so that restricted type values are always base type values</li>
							<li>code processing a type can be reused to process restricted types</li>
						</ul>
						<li>If there is a well-designed type hierarchy, programming becomes easier</li>
						<ul>
							<li>simple code can be written to handle the basic types</li>
							<li>if required, more advanced code can be written for the restricted types</li>
							<li>in many cases, restriction is more for validation than for processing</li>
						</ul>
						<li>XSDLs may even use <em>abstract types</em></li>
						<ul>
							<li>no element will ever use the <xml>addressType</xml></li>
							<li>concrete elements will only use restricted types</li>
							<li>there can be code handling the <xml>addressType</xml> which handles all addresses</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xsdl-complex-type-extension">
				<title>Complex Type Extension</title>
				<slide>
					<title>Adding Content</title>
					<ul>
						<li>Complex types are element content and attributes</li>
						<ul>
							<li>extensions can add content, but only at the end of the base content</li>
							<li>extensions can add attributes (order is not significant for attributes)</li>
						</ul>
						<li>Adding content to existing content may not change the existing content</li>
						<ul>
							<li>if the content is <em>element only</em>, it has to remain element only</li>
							<li>if the content is <em>mixed</em>, is has to remain mixed</li>
							<li>if the content is <em>empty</em>, it may become element only or mixed</li>
							<li>the reason for these rules is that <em>mixed</em> is a global property of a type</li>
						</ul>
						<li>Adding attributes simply adds these to the list of existing attributes</li>
						<ul>
							<li>the added attributes may be optional or required</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Complex Type Extension (Example)</title>
					<listing src="complex-extension.xsd"/>
				</slide>
				<slide>
					<title>Processing Extended Complex Types</title>
					<ul>
						<li>Values of extended types are <em>not</em> values of the base types</li>
						<ul>
							<li>type extension adds content add/or attributes to a type</li>
							<li>if content is added, it is always added at the end of the base type's content</li>
						</ul>
						<li>If there is a well-designed type hierarchy, programming becomes easier</li>
						<ul>
							<li>simple code can be written to handle the basic types</li>
							<li>if that should handle extended types, it must be written to handle extensions</li>
							<li><q>handling extensions</q> can be as simple as skipping them</li>
						</ul>
						<li>XSDLs may even use <em>abstract types</em></li>
						<ul>
							<li>no element will ever use the <xml>addressType</xml></li>
							<li>concrete elements will only use extended types</li>
							<li>code handling extended types can build on code handling the base type</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSDL Features</title>
				<ul>
					<li>XSDL allows defining a grammar for XML documents</li>
					<li>Types make it easier to turn a model into a grammar</li>
					<li>Some of the things we have not seen:</li>
					<ul>
						<li>named groups, modularizing schemas, wildcards, substitution groups, …</li>
					</ul>
				</ul>
			</slide>
			<slide id="schema-components">
				<title>Schema Components</title>
				<img style="height : 75% ; margin : 2% ; " src="schema-components.gif" title="Schema Components"/>
			</slide>
		</part>
	</presentation>
	<presentation id="xmlmodeling">
		<title short="Modeling">From Model to Markup</title>
		<date>2007-11-06</date>
		<toc class="resources"/>
		<toc class="abstract">While XML is very useful for representing and manipulating structured data, the question remains where these structures come from. They are usually some kind of encoding for a conceptual model, but there is no established and universally accepted way of how to connect the modeling world with XML markup. Some of the challenges and approaches to XML and modeling will be presented in this lecture. The goal of this lecture is to raise awareness for the current gap between models and markup, and for practical approaches how to bridge that gap.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Motivation</title>
			<slide>
				<title>Writing schemas is hard &amp; tedious</title>
				<ul>
					<li>Schema languages can be hard to deal with because</li>
					<ul>
						<li>they are limited (DTD)</li>
						<li>they are complex (XSDL)</li>
					</ul>
					<li>Schemas are not a good way to model data</li>
					<li>… for practical reasons:</li>
					<ul>
						<li>schemas can be confusing to look at (XSDL)</li>
						<li>schema are not intelligible to non-developers</li>
					</ul>
					<li>… for technical reasons:</li>
					<ul>
						<li>schemas are <em>representation-</em> and <em>technology-</em>specific (they only describe XML)</li>
						<li>XML is a tree-centric format, and so are its schema languages</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Writing 5cHEMa$ is cool &amp; g33ky!</title>
				<ul>
					<li>This obviously is an engineer's view</li>
					<li>Writing schemas should not be a goal, but a means to an end</li>
					<li>Data modeling should be done by data modelers</li>
					<li>A more conceptual view of the structures represented by the schemas would …</li>
					<ul>
						<li>… ease understanding for non-developers</li>
						<li>… enable focusing more on semantics</li>
						<li>… provide a more goal-oriented approach</li>
						<li>… allow for model integration</li>
						<li>… lead to more platform independence</li>
					</ul>
				</ul>
			</slide>
			<slide id="model-layers">
				<title>We Need a Conceptual Modeling Layer</title>
				<ul>
					<li>We need to be able to do modeling on a more abstract level</li>
					<li>In the database world, this level is called the <q>conceptual modeling layer</q></li>
					<table rules="all" class="stackTab">
						<tr>
							<th rowspan="2">Layer</th>
							<th colspan="2">Technology</th>
						</tr>
						<tr>
							<th>database world (SQL)</th>
							<th>XML related</th>
						</tr>
						<tr>
							<td>conceptual</td>
							<td>Entity Relationship Diagrams</td>
							<td><link href="model-to-markup">???</link></td>
						</tr>
						<tr>
							<td>logical</td>
							<td>DDL (<code>CREATE TABLE …</code>)<br/>DML (<code>INSERT INTO …</code>)</td>
							<td>schemas<br/>XQuery</td>
						</tr>
						<tr>
							<td>physical</td>
							<td>table space</td>
							<td>XML</td>
						</tr>
					</table>
					<li>Modeling Layers? Layering Models?</li>
				</ul>
			</slide>
		</part>
		<part>
			<title> Modeling Layers – Layering Models?</title>
			<part id="model">
				<title>Modeling</title>
				<slide id="model-definition">
					<title>What is a Model?</title>
					<ul>
						<li><q>A simplification</q></li>
						<ul>
							<li>only consider some relevant / interesting traits, neglecting details / unneeded properties</li>
							<li>the architectural model of the parthenon in the <a href="http://www.thebritishmuseum.ac.uk/gr/debate.html">British Museum</a></li>
						</ul>
						<li><q>An abstraction</q></li>
						<ul>
							<li>a generalization / concept / idealization</li>
							<li>determine / distinguish common / defining / characteristic attributes</li>
							<li>platonic ideas</li>
						</ul>
						<li><q>A template</q></li>
						<ul>
							<li>a mold / blueprint / reference example</li>
							<li>prescribe relevant / defining attributes</li>
						</ul>
						<li>The former has no physical embodiment, whereas the latter have</li>
						<li>Usually there is a one-to-many relationship between <em>models</em> and <em>instances</em></li>
					</ul>
					<note>The cardinality of the relationship model-instance can be many-to-one! Think of a toy vessel – e.g., a <em>Titanic</em>! But wait … which one's now the <em>model</em>?</note>
				</slide>
				<slide id="model-fun">
					<title>What is a Model? (Natural Language)</title>
					<img src="http://www.markrobertwahlberg.com/mwck.jpg" style="height: 70%;" class="floatRight" />
					<ul>
						<li>Compare the use of <q>model</q> in (more) natural language:</li>
						<ul>
							<li><a href="http://sketchup.google.com/3dwarehouse/details?mid=e86d2c2bac08770486ed0f2c6244a512">Sather tower</a> is modeled after <a href="http://en.wikipedia.org/wiki/St_Mark%27s_Campanile">San Marco's <em>campanile</em></a></li>
							<li>The Ford <a href="http://www.modelt.org/">Model 'T'</a></li>
							<li>A fashion model <a href="http://www.markrobertwahlberg.com/mwck.jpg">→</a><sup>1</sup></li>
							<li><q><a href="http://www.brickshelf.com/gallery/stwr90/bridges/image0007.jpg.jpg">This</a> model is a model of the Golden Gate Bridge and it required about 300 pieces …</q><sup>2</sup></li>
							<li>Architectural models:<sup>3</sup></li>
							<ul class="dialog">
								<li><b>Derek Zoolander:</b> <q>What is this? A center for ants? How can we be expected to teach children to learn how to read … if they can't even fit inside the building?</q></li>
								<li><b>Mugatu:</b> <q>Derek, this is just a small …</q></li>
								<li><b>Derek Zoolander:</b> <q>I don't wanna hear your excuses! The building has to be at least … three times bigger than this!</q></li>
							</ul>
						</ul>
					</ul>
					<note>
						<ol>
							<li>From <a href="http://www.markrobertwahlberg.com/">http://www.markrobertwahlberg.com/</a></li>
							<li>From <a href="http://www.mocpages.com/moc.php/6128">http://www.mocpages.com/</a></li>
							<li>From <a href="http://www.imdb.com/title/tt0196229/quotes">http://www.imdb.com/</a></li>
						</ol>
					</note>
				</slide>
				<slide id="model-modeling">
					<title>Modeling</title>
					<ul>
						<li>The process of identifying those <q>relevant</q> attributes and omitting the rest</li>
						<li>The formulation (or translation) thereof in a <em>way of description</em> commonly used or even standardized (mapping to a <em>meta-model</em>)</li>
						<li>This involves design decisions and trade-offs to be made</li> 
						<ul>
							<li>choosing the right granularity</li>
							<li>flexibility vs. stringency</li>
						</ul>
						<li>Modeling therefore always is …</li>
						<ul>
							<li>… connected to a certain <em>perspective</em></li>
							<li>… limited to a certain <em>scope</em></li>
							<li>… having a main <em>focus</em></li>
						</ul>
					</ul>
					<note>In a certain field / realm / <em>universe of discourse</em> there usually is some agreement on how modeling has to be done. This is an essential prerequisite for models to be used as a subject of discussion / negotiation / evaluation. This agreement can have been achieved implicitly or by standardization. In the <link href="model-fun">example</link> above, Derek Zoolander does not know the conventions implicitly being agreed on when dealing with architectural models.</note>
				</slide>
				<slide id="model-reasons">
					<title>Why modeling?</title>
					<ul>
						<li>Get a bigger picture:</li>
						<ul>
							<li>focus on relevant features</li>
							<li>deal with data's meaning instead of representation</li>
							<li>facilitate interaction / integration</li>
						</ul>
						<li>Description: allows for</li>
						<ul>
							<li>analysis &amp; improvement</li>
							<li>documentation</li>
							<li>verification</li>
						</ul>
						<li>Prescription: allows for</li>
						<ul>
							<li><a href="http://www.henrywagner.org/pictures/ff/Thanksgiving2005/IMG_1752.php">simulation</a></li>
							<li>prognosis</li>
							<li>making assumptions (e.g. when creating software processing the XML described by a schema)</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="layer">
				<title>Layering</title>
				<slide id="layer-cs">
					<title>Layering in Computer Science</title>
					<ol>
						<li>Encapsulating / hiding details / internals</li>
						<li>Enabling working on a simpler / more goal-oriented level</li>
						<li>Reusing frequent structures / procedures (<q>patterns</q><sup>1</sup>)</li>
						<li>Gaining independence from specific technologies / media</li>
					</ol>
					<note>
						<ol>
							<li><q>Patterns are models that are sufficiently general, adaptable, and worthy of imitation that we can reuse them.</q> (From: Glushko, Robert J. and McGrath, Tim: <em>Document Engineering</em>, p. 90). Identifying such patterns is a modeling task!</li>
						</ol>
					</note>
				</slide>
				<slide id="layer-communication">
					<title>Layering in Computer Science: Protocol stacks</title>
					<ol class="floatRight">
						<li>Encapsulation</li>
						<li>Goal-orientation</li>
						<li>Pattern reuse</li>
						<li>Independence</li>
					</ol>
					<ul>
						<li>The TCP/IP protocol stack</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>Google; a wiki; your blogging tool</td>
								<td rowspan="2"><em>Application</em></td>
								<td>(1)</td>
							</tr>
							<tr>
								<td>HTTP, FTP</td>
								<td>(3)</td>
							</tr>
							<tr>
								<td>TCP, UDP</td>
								<td><em>Transport</em></td>
								<td>(2)</td>
							</tr>
							<tr>
								<td>IP</td>
								<td><em>Network</em></td>
								<td>(4)</td>
							</tr>
							<tr>
								<td>Ethernet, 802.11</td>
								<td><em>MAC (Medium Access Control)</em></td>
								<td></td>
							</tr>
							<tr>
								<td>NRZ, DSSS, 16QAM</td>
								<td rowspan="2"><em>Physical</em></td>
								<td></td>
							</tr>
							<tr>
								<td>Copper wire, fiber, RF</td>
								<td></td>
							</tr>
						</table>
						<li>Metaphor: Andrew S. Tanenbaum's interpreter</li>
					</ul>
				</slide>
				<slide id="layer-compiler">
					<title>Layering in Computer Science: Compilers</title>
					<ol class="floatRight">
						<li>Encapsulation</li>
						<li>Goal-orientation</li>
						<li>Pattern reuse</li>
						<li>Independence</li>
					</ol>
					<ul>
						<li>Compilers, Virtual Machines, VHDL</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>UML</td>
								<td>(2)</td>
							</tr>
							<tr>
								<td><span style="font-variant: small-caps">Java</span></td>
								<td>(4)</td>
							</tr>
							<tr>
								<td>Virtual machine</td>
								<td></td>
							</tr>
							<tr>
								<td>C</td>
								<td>(1)</td>
							</tr>
							<tr>
								<td>Assembler Language</td>
								<td></td>
							</tr>
							<tr>
								<td>Machine Code</td>
								<td></td>
							</tr>
							<tr>
								<td>VHDL</td>
								<td></td>
							</tr>
							<tr>
								<td>Logic Gates</td>
								<td>(3)</td>
							</tr>
						</table>
						<li>Metaphor: Big enterprise with strong vertical division of work</li>
					</ul>
				</slide>
				<slide id="layer-physiology">
					<title>Layering in Human Physiology</title>
					<ul>
						<li>Perception:</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>My Bicycle</td>
								<td>Memory, social implications …</td>
							</tr>
							<tr>
								<td>A Bicycle</td>
								<td>Prior interpretative knowledge</td>
							</tr>
							<tr>
								<td>Circles, lines and a diamond shape, colored</td>
								<td>Form vision</td>
							</tr>
							<tr>
								<td>Electrical pulses</td>
								<td>Neural transmission</td>
							</tr>
							<tr>
								<td>A concentration of Rhodopsin, Opsin, cGMP, …</td>
								<td>Retina: Rods &amp; Cones</td>
							</tr>
							<tr>
								<td>A bunch of sunrays, reflected</td>
								<td>Pupil, eye lens</td>
							</tr>
						</table>
						<li>Motion:</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>Shake hands</td>
								<td>Conscious action</td>
							</tr>
							<tr>
								<td colspan="2" align="center">… all the way down to …</td>
							</tr>
							<tr>
								<td>Converting some ATP</td>
								<td>Mitochondrions</td>
							</tr>
						</table>
					</ul>
				</slide>
			</part>
			<slide id="model-layer">
				<title>The Combination: Model Layers!</title>
				<ul>
					<li>Modeling layers: Models of different level of abstraction and / or granularity, stacked onto each other as layers</li>
					<li>There are different classifications of such layers of modeling</li>
					<li>There is a diligent classification used in the context of data modeling: P.P.S. Chen's <q>Multiple Views of Data</q>:<sup>1</sup></li>
					<ol>
						<li>information concerning entities and relationships in our minds</li>
						<li>information structure – organization in which entities and relationships are represented by data.</li>
						<li>access-path independent data structure – the data structures which are not involved with search schemes, indexing schemes, etc.</li>
						<li>access-path dependent data structure</li> 
					</ol>
					<li>Most often the coarser classification <link href="model-layers">instance / logical / conceptual</link> layer is used</li>
				</ul>
				<note>
					<ol>
						<li>Chen, Peter Pin-Shan: <em>The Entity-Relationship Model – Towards a Unified View od Data</em>, Cambridge MA, 1976.</li>
					</ol> 
				</note>
			</slide>
		</part>
		<part id="data-modeling">
			<title>Data Modeling</title>
			<slide id="data-modeler">
				<title>Who's a Data Modeler?</title>
				<ul>
					<li><em>You are Data Modelers!</em></li>
					<li>You did (implicitly, and perhaps unconsciously) Data Modeling while creating the résumé DTD</li>
					<li>Most likely you modeled your data already when creating the XML instance</li>
					<li>In either case you most likely were thinking in some semantic structures on a more <em>conceptual</em> level than the final schema or instance resides</li>
					<li>Did anybody …</li>
					<ul>
						<li>… draw some trees?</li>
						<li>… sketch some boxes?</li>
						<li>… write down some lists?</li>
					</ul>
				</ul>
			</slide>
			<slide id="quality-criteria">
				<title>Quality Criteria</title>
				<ul>
					<li>See the quality criteria from <link href="bestpractices">The Good, the Bad, and the Ugly</link></li>
					<ul>
						<li>avoid <link href="redundant-data">redundancy</link>, especially within <link href="schema-redundancy">schemas</link></li>
						<li>enforce <link href="reuse">reuse</link></li>
						<li>be <link href="element-vs-attribute">consistent</link></li>
						<li>choose a reasonable level of <link href="granularity">granularity</link></li>
						<li>think in terms of the logical structure (e.g., the <link href="infoset">Infoset</link>) rather than in terms of the physical representation (e.g., the XML how it is stored in a file)</li> 
					</ul>
					<li>What is <q>good</q> data modeling?</li>
					<li>→ most of the above criteria are applicable to data modeling as well</li>
				</ul>
			</slide>
			<part>
				<title>An example case: Harry again</title>
				<slide id="harry-returns">
					<title>Harry returns</title>
					<ul>
						<li>Make up a data model for general résumés</li>
						<ol>
							<li>capture <em>logical structure</em> rather than <em>representation</em> of the data</li>
							<li>do not include information encoded in the <em>document's structure</em> into the <em>instance's data</em> (rely on that certain amount of self-description of XML)</li>
						</ol>
						<li>In <link href="xml-views">The Good, the Bad, and the Ugly</link>, we have been told:
							<q>think about working with a tree rather than working with a text file</q> – so let's draw a tree!
						</li>
					</ul>
					<note>
						<ol>
							<li>The fact, that the person's name and contact information usually is given in a <em>head</em> section of a résumé document does not necessarily mean that such a head section is a relevant structural element: It's just a representational convention, and therefore should not be part of the data model. When creating a <em>view</em> of the data, we can utilize our knowledge of appropriate representational conventions by rendering personal information in a head section.</li>
							<li>If our vocabulary contains dedicated elements for <elem>education</elem> and <elem>experience</elem> it is not necessary to include attributes or elements specifying section titles like <q>education</q> or <q>experience</q>: This information can be retrieved from the structure and again added to a specific view when being generated.</li>
						</ol>
					</note>
				</slide>
				<slide id="harry-tree">
					<title>Retrieving the résumé's structure</title>
					<div class="panel">
						<object data="img/res01c.svg" type="image/svg+xml" width="900" height="450">
							<param name="src" value="img/res01c.svg" />
							<p>You need to have an SVG viewer installed in order to view this graphic!</p>
						</object>
					</div>
					<ul>
						<li>We omit <elem>phone</elem> and <elem>email</elem> for the sake of simplicity</li>
					</ul>
				</slide>
				<slide id="good-dtd-1">
					<title>A well-designed DTD (1)</title>
					<listing src="resume-model-01.dtd" line="1-9" />
					<ul>
						<li>The order constraint in <elem>résumé</elem> probably is introduced by the schema language's limitations</li>
						<ul>
							<li>SGML: <q><code>&amp;</code></q> connector (not part of XML)</li>
							<li>XSDL: <xsde>all</xsde> model group</li>
						</ul>
						<li><elem>date</elem> has been made more flexible (by denoting <elem>day</elem> to be optional) in order to be useable more generally</li>
						<li><code>proficiency</code> readily can be modeled as an attribute; its value space is a good example for an enumeration</li>
					</ul>
				</slide>
				<slide>
					<title id="good-dtd-2">A well-designed DTD (2)</title>
					<listing src="resume-model-01.dtd" line="12-16" />
					<ul>
						<li><elem>address</elem> and <elem>name</elem> are good examples for reusable elements</li>
						<li><elem>name</elem> is a semantically sensible container: <elem>first</elem> etc are <em>parts-of</em> it</li>
						<li>the nesting <elem>startDate</elem> (or <elem>endDate</elem>, respectively) » <elem>date</elem> semantically is a less expressive relation; it merely is inserted in order to</li>
						<ul>
							<li>reuse <elem>date</elem></li>
							<li>insert two <elem>date</elem> elements,</li>
							<li>giving the two some reasonable (self-descriptive) names</li>
						</ul>
						<li>this clearly would be a good case for using named types in XSDL</li>
					</ul>
				</slide>
 				<slide id="good-instance">
					<title>A good instance from a well-designed DTD</title>
					<listing src="resume-felix-01.xml" line="10-13" />
					<listing src="resume-felix-01.xml" line="24-27" />
					<listing src="resume-felix-01.xml" line="96-100" />
					<listing src="resume-felix-01.xml" line="132-134" />
				</slide>
				<slide>
					<title>A look at the <em>essay</em> section</title>
					<listing src="resume-felix-01.xml" line="121-130" />
					<ul>
						<li>There's still redundant data!</li>
						<li>Plain, unstructured text; hard to be interpreted for machines</li>
						<li>Use NLP … – or improve your data model!</li>
					</ul>
				</slide>
				<slide id="good-dtd-review">
					<title>Critical Review: A well-designed DTD?</title>
					<ul>
						<li>The DTD is not that well-designed:</li>
						<ol>
							<li>regarding the markup: container elements around <elem>education</elem> and <elem>experience</elem> items would be nice</li>
							<li>from a data perspective: resolve redundancies</li>
							<li>conceptually: allow for representation of these semantic relations</li>
						</ol>
						<li>To achieve this, we need:</li>
						<ul>
							<li>A better modeling formalism</li>
							<li>More precise quality criteria</li>
						</ul>
						<li>In the world of relational databases, both of them exist</li>
					</ul>
				</slide>
			</part>
			<part id="relational-modeling">
				<title>Excursus: Data Modeling in the World of Relational Databases</title>
				<slide id="relational-nf">
					<title>Quality Criteria: Normal Forms</title>
					<ul>
						<li>There are a well-defined quality criteria of increasing strictness, called <a href="http://dret.net/glossary/normalform">Normal Forms</a><sup>1</sup></li>
						<li>An informal example:</li>
						<table rules="all" class="tab">
							<tr>
								<th class="tabHi">ID</th>
								<th>Name</th>
								<th>Study</th>
								<th>Department</th>
							</tr>
							<tr>
								<td class="tabHi">24536133</td>
								<td>Bob</td>
								<td>Computer Science</td>
								<td>College of Engineering</td>
							</tr>
							<tr>
								<td class="tabHi">34125004</td>
								<td>Alice</td>
								<td>Document Engineering</td>
								<td>School of Information</td>
							</tr>
							<tr>
								<td class="tabHi">11042019</td>
								<td>Zlatan</td>
								<td>Computer Science</td>
								<td>College of Engineering</td>
							</tr>
						</table>
						<li>Must be resolved to:</li>
						<table rules="all" class="tab" style="float: right;">
							<tr>
								<th class="tabHi">Study</th>
								<th>Department</th>
							</tr>
							<tr>
								<td class="tabHi">Computer Science</td>
								<td>College of Engineering</td>
							</tr>
							<tr>
								<td class="tabHi">Document Engineering</td>
								<td>School of Information</td>
							</tr>
						</table>
						<table rules="all" class="tab">
							<tr>
								<th class="tabHi">ID</th>
								<th>Name</th>
								<th>Study</th>
							</tr>
							<tr>
								<td class="tabHi">24536133</td>
								<td>Bob</td>
								<td>Computer Science</td>
							</tr>
							<tr>
								<td class="tabHi">34125004</td>
								<td>Alice</td>
								<td>Document Engineering</td>
							</tr>
							<tr>
								<td class="tabHi">11042019</td>
								<td>Zlatan</td>
								<td>Computer Science</td>
							</tr>
						</table>
					</ul>
					<note>
						<ol>
							<li>Yet, the most strict normal forms (4NF, 5NF) are hardly ever used in practice for the reasons mentioned <link href="redundant-data">earlier</link></li>
						</ol>
					</note>
				</slide>
				<slide id="relational-er">
					<title>Conceptual Modeling Formalism: Entity-Relationship Diagrams</title>
					<ul>
						<li>There is a well-established (graphical!) formalism for conceptual modeling</li>
						<li>Using the formalism in the correct way leads to data models complying with the quality criteria</li>
						<li>An example:</li>
					</ul>
					<div class="panel">
						<img src="ER.png" />
					</div>
					<ul>
						<li>Today, there exist several ER-dialects; <a href="http://www.uml.org/">UML</a> is a superset</li>
					</ul>
				</slide>
			</part>
			<part id="conceptual-modeling">
				<title>Conceptual Modeling for XML Data</title>
				<slide id="conceptual-modeling-today">
					<title>Is there anything similar for XML?</title>
					<ul>
						<li>There is no established formalism; nor is there any formalism as suitable for XML as ER is for relational databases</li>
						<li>There are scientific proposals: Several extensions of ER (XER, ERX, EER), formal grammars (XGrammar)</li>
						<ul>
							<li>some of them have limited scope, some are impractical for real-world deployment, some of them have no graphical equivalent at all</li>
						</ul>
						<li>Extended/restricted versions of <a href="http://www.uml.org/">UML</a></li>
						<ul>
<!--						
							<li>UML cannot model everything XML could express (e.g. mixed content)</li>
							<li>XML cannot express everything UML can model (e.g. ?)</li>
-->						
							<li>most often, UML is simply used as a drawing tool for schemas: Schemas are a bad way for conceptual modeling, UML is a bad drawing tool</li>
						</ul>
						<li>Textual descriptions can be used</li>
						<ul>
							<li>possibly inaccurate, translation to schemas is error-prone and not formally verifiable</li>
						</ul>
						<li>In strongly data-oriented context, spreadsheets can be used (<a href="http://docs.oasis-open.org/ubl/cd-UBL-1.0/">UBL</a>)</li>
						<ul>
							<li>this restricts the structural expressiveness in fundamental ways</li>
						</ul>
					</ul>
				</slide>
				<slide id="conceptual-modeling-probelms">
					<title>Why is it so hard to create a suitable formalism?</title>
					<ul>
						<li>XML data can be much more complex than relational data:</li>
						<ul>
							<li>hierarchical structures (maybe even recursive)</li>
							<li>mixed content</li>
							<li>alternatives (<q><code>|</code></q> connector, <xsde>choice</xsde> model group)</li>
							<li>order constraints</li>
							<li>ID/IDREF constructs</li>
							<li>faceted/enumerated content models</li>
						</ul>
						<li>There are no clear quality criteria: Many things are a question of <em>style</em> or <em>taste</em></li>
						<ul>
							<li>schema languages allow for different paradigms to be used</li>
							<li>functionally equivalent (and semantically similar) results can be achieved by quite different means: e.g. <em>choice model group</em> vs. <em>substitution group</em></li>
							<li>the quality of the final schema or XML may considerably depend on things beyond the scope of a conceptual model</li>
						</ul>
					</ul>
				</slide>
				<slide id="conceptual-modeling-approach">
					<title>An informal formalism</title>
					<ul>
						<li>As there is no established notation, <q><link href="model-to-markup">informal models may use any notation</link></q></li>
						<li>Well then, let's draw some boxes!</li>
						<li>But even though, let's try to do it in a systematic way:</li>
					</ul>
					<table rules="groups" border="1" class="tab">
						<thead>
							<tr>
								<th></th>
								<th>Determine …</th>
								<th>Phase</th>
								<th>Question</th>
								<th>Example</th>
								<th>Action</th>
							</tr>
						</thead>
						<tbody>
							<tr>
								<td>1.</td>
								<td>Entities</td>
								<td>Inventory</td>
								<td rowspan="2">What's there?</td>
								<td><elem>person</elem>, <elem>company</elem></td>
								<td>Sketch boxes</td>
							</tr>
							<tr>
								<td>2.</td>
								<td>Reusable Objects</td>
								<td>Analysis</td>
								<td><elem>address</elem>, <elem>date</elem></td>
								<td>Perhaps include some model libraries (<a href="http://docs.oasis-open.org/ubl/cd-UBL-1.0/#SCHEMAS">UBL</a>)</td>
							</tr>
							<tr>
								<td>3.</td>
								<td>Reusable Tags</td>
								<td>Markup design</td>
								<td>What do we need?</td>
								<td>lists, hyperlinks, headings</td>
								<td>Perhaps include some schemas (<a href="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">XHTML</a>)</td>
							</tr>
							<tr>
								<td>4.</td>
								<td>Relations</td>
								<td>Assembly</td>
								<td>What's the connection?</td>
								<td>has-a, contains, references</td>
								<td>Draw arcs and arrows</td>
							</tr>
						</tbody>
					</table>
				</slide>
				<slide id="conceptual-modeling-example">
					<title>The résumé structure, informally formalized</title>
					<div class="panel">
						<img  style="height : 70% ; margin : 2% ; " src="informal.png" />
					</div>
					<note>
						<ul>
							<li>This clearly is not a tree anymore</li>
							<li>There's no specific schema language indicated</li>
							<li>Still missing are:</li>
							<ul>
								<li>constrained / faceted value spaces</li>
								<li>order constraints</li>
								<li>type inheritance (XSDL)</li>
							</ul>
						</ul>
					</note>
				</slide>
				<slide id="better-dtd">
					<title>An even better DTD</title>
					<listing src="resume-model-02.dtd" line="5-6" />
					<listing src="resume-model-02.dtd" line="42-43" />
					<listing src="resume-model-02.dtd" line="22-23" />
					<listing src="resume-model-02.dtd" line="27-31" />
					<listing src="resume-model-02.dtd" line="47-53" />
				</slide>
				<slide id="better-instance">
					<title>An even better instance</title>
					<listing src="resume-felix-02.xml" line="69-75" />
					<ul>
						<li>The essay section: real mixed-content markup!</li>
					</ul>
					<listing src="resume-felix-02.xml" line="134-142" />
				</slide>
				<slide id="generating-views">
					<title>Generating Views</title>
					<ul>
						<li>Utilize the (non-tree) ID/IDREF-relations in order to retrieve the data where needed:</li>
					</ul>
					<listing src="narrative.xsl" line="183-191" />
					<ul>
						<li>From a well-designed data structure with rich semantic connectivity, multiple <em>views</em> can easily be derived</li>
						<li>As an example, two different views have been generated using two XSLT 1.0 stylesheets</li>
						<table class="tab" rules="all">
							<tr>
								<th>View</th>
								<th>XSLT</th>
								<th>HTML</th>
							</tr>
							<tr>
								<th>Tabular</th>
								<td align="center"><a href="src/tabular.xsl">tabular.xsl</a></td>
								<td align="center"><a href="src/tabular.html">tabular.html</a></td>
							</tr>
							<tr>
								<th>Textual</th>
								<td align="center"><a href="src/narrative.xsl">narrative.xsl</a></td>
								<td align="center"><a href="src/narrative.html">narrative.html</a></td>
							</tr>
						</table>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xmlmx-conclusions">
			<title>Conclusions</title>
			<slide>
				<title>XML and Modeling</title>
				<ul>
					<li>Conceptual modeling is highly desirable</li>
					<li>When to be used in a <link href="layer-communication">communication-type</link> scenario, conceptual modeling inherently has to be relying upon an agreement on established formalisms</li>
					<li>When to be used in a <link href="layer-compiler">compiler-type</link> scenario, conceptual modeling requires the availability of appropriate tools</li>
					<li>There is no formalism really suitable for XML-centric data; there are no sophisticated tools<sup>1</sup></li>
					<li>→ There is a gap to be bridged!</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="schemalanguages">
		<title short="Schema Languages">Alternative Schema Languages – Schematron</title>
		<date>2007-11-08</date>
		<toc class="resources"><a href="http://www.thaiopensource.com/relaxng/design.html">The Design of RELAX NG</a>&#160;· <a href="http://www.schematron.com/" title="Schematron Home Page">Schematron</a></toc>
		<toc class="abstract">XSDL is only one representative from a class of languages which are all designed for the purpose of testing whether some XML document satisfies a set of constraints. This test could of course also be conducted programmatically, but this is not portable and not easily maintainable. Schema languages thus often use a declarative approach to specifying how to conduct validation. A very simple yet very powerful language for this is <em>Schematron</em>, which uses the expressive power of XPath for testing whether a document satisfies a set of conditions. Schematron is <em>rule-based</em> in contrast to the more traditional <em>grammar-based</em> schema languages and complements these very well.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>XML Schema Languages</title>
			<ul>
				<li>XML schema languages define constraints for XML documents</li>
				<ul>
					<li>defining constraints declaratively is better then writing program code</li>
					<li>programming should be deferred as long as possible</li>
				</ul>
				<li>XML schema languages validate XML documents</li>
				<ul>
					<li>DTDs check XML documents against the grammar rules</li>
					<li>XSDLs support additional datatyping for validating contents</li>
				</ul>
				<li>Applications often have many more constraints</li>
				<ul>
					<li>global constraints on the characters used in the document</li>
					<li><em>co-constraints</em> which relate content to content</li>
					<li>comparisons with external data (such as controlled lists)</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Schema-Validation and Applications</title>
			<img src="schema-valid-documents.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<slide>
			<title>Validation Pipelines</title>
			<ul>
				<li>Validation is a modular task with various facets</li>
				<ul>
					<li>modularization is a popular and useful principle in computer science</li>
					<li>XSDL is the attempt to build the <em>one and only schema language</em></li>
					<li>more modular approaches might lead to more flexible validation</li>
				</ul>
				<li>Validation pipelines are useful in various scenarios</li>
				<ul>
					<li>perform validation based on a sequence of basic validation tasks</li>
					<li>make validation more configurable (partial validation)</li>
					<li>make validation more flexible (different validation stages)</li>
				</ul>
				<li>Validation pipelines can be easily implemented</li>
				<ul>
					<li>programming languages support passing DOM trees as parameters</li>
					<li><em>XML pipeline languages</em> can be used to implement pipelines declaratively</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Validation Pipeline Example</title>
			<img src="validation-pipeline.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<part id="relax-ng">
			<title>RELAX NG</title>
			<slide>
				<title>Design by Committee</title>
				<ul>
					<li>XSDL was a political decision</li>
					<ul>
						<li>several schema languages were competing to replace DTDs</li>
						<li>DCD, DDML, SOX, XML Data, and XDR were inputs to XSDL</li>
						<li>XSDL became the first unreadable W3C specification</li>
						<li>implementing XSDL correctly is a hard (large number of specialized rules)</li>
					</ul>
					<li>Researchers were looking for a more elegant solution</li>
					<ul>
						<li>the underlying formalism should be well-defined and well-studied</li>
						<li>the schema language should be easy to learn and use</li>
						<li>lessons learned from DTDs should be included</li>
					</ul>
					<li>RELAX NG is the merger of two similar approaches</li>
					<ul>
						<li><em>Tree Regular Expressions (TREX)</em></li>
						<li><em>Regular Language description for XML (RELAX)</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>RELAX NG +/-</title>
				<ul>
					<li>RELAX NG and XSDL are direct competitors</li>
					<li>Advantages of RELAX NG</li>
					<ul>
						<li><link href="relaxng-compact"/></li>
						<li>the document element is well-defined</li>
						<li>SGML's <q><code>&amp;</code></q> is supported (<xsde>all</xsde> is extremely limited)</li>
						<li>non-deterministic content models</li>
					</ul>
					<li>Disadvantages of RELAX NG</li>
					<ul>
						<li>no datatype support (datatype libraries can be included)</li>
						<li>no modeling facilities in the spirit of XSDL's type derivation</li>
						<li>less popular than XSDL</li>
						<li>no support for XSDL's numeric occurrence constraints (<xsd>minOccurs</xsd>/<xsd>maxOccurs</xsd>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>RELAX NG Syntaxes</title>
				<img src="xml-technology-syntaxes.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
			<part>
				<title>Principles</title>
				<slide>
					<title>Validation</title>
					<ul>
						<li>Validation should not change the document</li>
						<ul>
							<li>there are no default values</li>
						</ul>
						<li>Only schema↔instance tests are supported</li>
						<ul>
							<li>there is no type hierarchy as in XSDL (schema↔schema)</li>
							<li>there are no identity constraints (instance↔instance)</li>
						</ul>
						<li>Grammars should not be restricted</li>
						<ul>
							<li>DTDs and XSDL no not allow non-determinism</li>
							<li>RELAX NG allows non-deterministic content models</li>
							<pre>chess = white, (black, white)*, black?</pre>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Grammars</title>
					<ul>
						<li>RELAX NG grammars have a start symbol</li>
						<ul>
							<li>DTDs and XSDL do not have start symbols</li>
						</ul>
						<li>Attributes are defined as part of the <q>content model</q></li>
						<ul>
							<li>a more homogeneous view of the XML document tree</li>
							<li>this allows alternatives of elements and attributes</li>
						</ul>
						<li>Grammars are a set of named rules</li>
						<ul>
							<li>rules define how an element is composed</li>
							<li>local definitions (nested specifications of content models) are possible</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Example</title>
				<slide>
					<title>DTD and XSDL</title>
					<listing src="document.dtd"/>
					<listing src="document.xsd"/>
				</slide>
				<slide>
					<title>RELAX NG</title>
					<listing src="document.rng"/>
				</slide>
				<slide id="relaxng-compact">
					<title>RELAX NG Compact Syntax</title>
					<listing src="document.rnc"/>
				</slide>
			</part>
		</part>
		<part id="dsdl">
			<title short="DSDL">Document Schema Definition Languages (DSDL)</title>
			<slide>
				<title>Modular Validation</title>
				<ul>
					<li>RELAX NG gained popularity as an XSDL alternative</li>
					<ul>
						<li>RELAX NG left useful functionality out of the language</li>
						<li><link href="schematron"/> appeared as a useful addition to schema languages</li>
					</ul>
					<li>Based on the idea of modular validation, DSDL was announced</li>
					<ul>
						<li>DSDL should define a set of complementary schema languages</li>
						<li>DSDL should also define a framework for applying these languages</li>
					</ul>
					<li>Development and support have been slow and disappointing</li>
					<ul>
						<li>RELAX NG and Schematron are successful</li>
						<li>all other parts of DSDL are undefined or underspecified</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>DSDL Master Plan</title>
				<ul>
					<li>DSDL is described as having the following parts</li>
					<ol>
						<li>DSDL Overview</li>
						<li><link href="relax-ng"/></li>
						<li><link href="schematron"/></li>
						<li><em>Namespace-based Validation Dispatching Language (NVDL)</em></li>
						<li><em>Data Type Library Language (DTTL)</em></li>
						<li>Path-based integrity constraints</li>
						<li><em>Character Repertoire Validation Language (CRVL)</em></li>
						<li><em>Document Schema Renaming Language (DSRL)</em></li>
					</ol>
					<li>It is unlikely that DSDL will succeed</li>
					<ul>
						<li>years of identical presentations and stalled developments</li>
						<li>ISO is not a good place for fast-paced technologies</li>
					</ul>
					<li>DSDL should be regarded as an <em>inspiration</em>, not as a <em>solution</em></li>
				</ul>
			</slide>
		</part>
		<part id="schematron">
			<title>Schematron</title>
			<slide>
				<title>XPath Again</title>
				<ul>
					<li>Schematron popularized XPath-based testing of XML documents</li>
					<ul>
						<li>the language is far from being well-designed</li>
						<li>it can be easily used to write down a number of XPath-based constraints</li>
						<li>it can be used as an inspiration to do a better job of XPath-based testing</li>
					</ul>
					<li>XPath makes it very easy to select parts of XML trees</li>
					<ul>
						<li>many XSLT programs contain some <q>validation</q> before processing</li>
						<li>validation and processing should be kept separate</li>
						<li>if validation is kept separate, there may be easier ways than XSLT</li>
					</ul>
					<li>Schematron has been built for human-oriented reporting</li>
					<ul>
						<li>Schematron outputs are text messages that human should read</li>
						<li>machine-oriented validation requires different features</li>
						<li>integrating Schematron into machine-oriented pipelines requires some efforts</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Basics</title>
				<ul>
					<li>Schematron schemas can be regarded as scripts for XPath testing</li>
					<ul>
						<li><em>patterns</em> group together a set of task-oriented tests</li>
						<li><em>rules</em> define tests which have to be applied in a certain context</li>
						<li><em>assertions</em> are XPaths which are evaluated in a given context</li>
					</ul>
					<li>Schematron in most cases are not covering the whole XML tree</li>
					<ul>
						<li>for the rules to work, the structural integrity should be validated first</li>
						<li>if the structure of the tree is valid, rules specify additional constraints</li>
						<li>Schematron is a <em>complement</em> to grammars, not a <em>replacement</em></li>
					</ul>
				</ul>
				<listing src="address.sch" line="2-9"/>
			</slide>
			<part>
				<title>Implementation</title>
				<slide>
					<title>Performing Validation</title>
					<ul>
						<li>Schema languages are declarative inputs for validation</li>
						<ul>
							<li>schema languages are not executable programming languages</li>
							<li>to perform validation, some software component must process documents and schemas</li>
						</ul>
						<li>Schema languages require supporting software</li>
						<ul>
							<li>DTDs are part of XML, validating XML processor must perform DTD validation</li>
							<li>XSDL is a separate specification, an XSDL processor is required</li>
						</ul>
						<li>Schematron is built around XPaths</li>
						<ul>
							<li>any technology supporting XPath evaluation would be a good foundation</li>
							<li>XSLT is a technology supporting XPath evaluation</li>
							<li>XSLT's program flow control is good enough to support Schematron</li>
							<li>XSLT processors are available for a large number of platforms</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XSLT-Generated XSLT</title>
					<ul>
						<li>XSLT uses XML as its syntax</li>
						<ul>
							<li>this is inconvenient because XSLT programs are very verbose</li>
							<li>processing XSLT with XSLT is supported very well</li>
							<li>for power users, the benefits outweigh the discomforts</li>
						</ul>
						<li>How is it possible to generate XSLT from XSLT?</li>
						<ul>
							<li>it is impossible to use literal result elements (they would be executed)</li>
							<li>it would be against XSLT's idea to write the resulting XSLT as text</li>
							<li>there must be a distinction between <q>executable</q> and <q>output</q> XSLT elements</li>
						</ul>
					</ul>
					<pre>&lt;xsl:template match="rule">
	<span style="color : red ; ">&lt;xsl:template match="{@context}"></span>
		&lt;xsl:apply-templates select="assert"/>
	<span style="color : red ; ">&lt;/xsl:template></span>
&lt;/xsl:template></pre>
				</slide>
				<slide>
					<title>XSLT-Based Schematron</title>
					<img src="schematron-xslt.png" style="width : 90% ; margin : 4% ; "/>
				</slide>
				<slide>
					<title>Compiling Assertions</title>
					<listing src="skeleton1-5.xsl" line="60-64"/>
					<listing src="skeleton1-5.xsl" line="67-67"/>
					<listing src="skeleton1-5.xsl" line="146-179"/>
				</slide>
				<slide>
					<title>Compiled Example</title>
					<listing src="address.xsl"/>
				</slide>
			</part>
			<part id="schematron-patterns">
				<title>Patterns</title>
				<slide>
					<title>Grouping Tests</title>
					<ul>
						<li>Patterns are containers for a set of <link href="schematron-rules"/></li>
						<ul>
							<li>patterns are used for representing goal-oriented parts of the validation</li>
							<li>achieving one goal may require checking within various contexts</li>
						</ul>
						<li>Patterns are described by a title and additional text</li>
						<ul>
							<li>Schematron is geared towards human users</li>
							<li>title and text are documentation only, they are never used for validation</li>
						</ul>
						<li>Patterns can be grouped by <em>phases</em> for different validation tasks</li>
						<ul>
							<li>patterns group a set of rules specific for one validation goal</li>
							<li>depending on the application, different validation phases may require different sets of patterns</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="schematron-rules">
				<title>Rules</title>
				<slide>
					<title>Setting the Context</title>
					<ul>
						<li>Setting the context is essential for XPath expressions</li>
						<ul>
							<li>within <link href="schematron-patterns"/>, rules group context-specific <link href="schematron-assertions"/></li>
							<li>assertion XPaths are evaluated relative to a rule's context</li>
						</ul>
						<li><em>Abstract rules</em> make is possible to reuse assertions</li>
						<ul>
							<li>abstract rules are not evaluated (they do not have a context)</li>
							<li>other rules may import assertions by <em>extending</em> an abstract rule</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="schematron-assertions">
				<title>Assertions</title>
				<slide id="schematron-assert">
					<title>Assertions with <code>assert</code></title>
					<ul>
						<li><code>assert</code> is used to specify assertions</li>
						<ul>
							<li>if the XPath evaluates to <code>false</code>, the assertion's content is output</li>
							<li>assertion are always evaluated as boolean (type casting will be applied)</li>
						</ul>
						<li>Assertion XPaths are evaluated relative to the containing rule's context</li>
						<ul>
							<li>moving an assertion from one rule to another will change its meaning</li>
						</ul>
						<li>XPath is not good for expressing grammar rules</li>
						<ul>
							<li>grammar checking should be left to grammar-oriented languages</li>
						</ul>
					</ul>
					<pre>&lt;!ELEMENT ENTRY (NAME, ADDRESS, PHONENUM+, EMAIL) ></pre>
					<pre>( count(NAME) = 1 and count(ADDRESS) = 1 and count(EMAIL) = 1 ) and ( NAME[following-sibling::ADDRESS] and ADDRESS[following-sibling::PHONENUM] and PHONENUM[following-sibling::EMAIL] ) and ( count(NAME|ADDRESS|PHONENUM|EMAIL) = count(*) )</pre>
				</slide>
				<slide id="schematron-report">
					<title>Assertions with <code>report</code></title>
					<ul>
						<li><code>report</code> is used to generate reports</li>
						<ul>
							<li>if the XPath evaluates to <code>true</code>, then the assertion's content is output</li>
							<li>assertion are always evaluated as boolean (type casting will be applied)</li>
						</ul>
						<li>Logically, <code>assert</code> and <code>report</code> are inverse</li>
						<ul>
							<li><code>assert</code> is used to test conformance (it outputs errors)</li>
							<li><code>report</code> id used to report observations (it outputs messages)</li>
							<li>Schematron's processing model is underspecified (check assertions, print outputs)</li>
						</ul>
						<li>Schematron is useful for reporting to humans</li>
						<ul>
							<li>machine-oriented environments need a better processing model</li>
							<li>using Schematron as a starting point could be a good way to start</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Report Example</title>
					<listing src="address-report.sch" line="2-12"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Validation is Good</title>
				<ul>
					<li>Validation is better than writing code</li>
					<li>Validation should be seen as a pipeline process</li>
					<li>RELAX NG can be a useful and simple substitute for XSDL</li>
					<li>Schematron supports XPath-oriented constraints for XML documents</li>
				</ul>
			</slide>
		</part>
	</presentation>
    <presentation id="xquery-1">
        <title short="XQuery 1">XML Query (XQuery) – Part I</title>
        <date>2007-11-13</date>
        <toc class="resources"><a href="http://www.w3.org/TR/xquery" title="W3C XQuery Spec">Spec</a></toc>
        <toc class="abstract">The <em>XML Query (XQuery)</em> language has been designed to query collections of XML documents. It is thus different from XSLT, which primarily transforms one document at a time. However, the core of both languages is XPath 2.0, which means that learning XQuery (and XSLT 2.0) is not very hard when starting with a solid knowledge of XPath 2.0. XQuery's main concept is an expression language which supports iteration and binding of variables to intermediate results. The final result of an XQuery is a tree, which can be serialized in various serialization formats.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>XQuery in the XML Landscape</title>
			<slide>
				<title>XSLT using XPath</title>
				<img style="width : 86% ; margin : 4% ; " src="xslt-overview.png" title="XSLT Processing"/>
			</slide>
			<slide>
				<title>XQuery using XPath</title>
				<img style="width : 86% ; margin : 4% ; " src="xquery-overview-filesystem.png" title="File-based XQuery Processing"/>
			</slide>
			<slide>
				<title>XQuery using XPath + Database</title>
				<img style="width : 86% ; margin : 4% ; " src="xquery-overview-dbms.png" title="DB-based XQuery Processing"/>
			</slide>
		</part>
		<part id="xquery-expressions">
			<title>XQuery Expressions</title>
			<part>
				<title>Syntax Matters</title>
				<slide>
					<title>XML Syntax</title>
					<ul>
						<li>XML syntax is good (and bad …)</li>
						<ul>
							<li>XML is well-known and supported by many tools</li>
							<li>XML is verbose and hard to parse for humans</li>
						</ul>
						<li>XSLT's XML syntax is good (and bad …)</li>
						<ul>
							<li>it is easy to use for experienced XML users</li>
							<li>and a nightmare to get used to for XML beginners</li>
							<li>it can be easily fed to itself for advanced applications (meta-stylesheets)</li>
							<li>which only advanced XSLT programmers will ever do</li>
						</ul>
						<li>Even XSLT is not completely XMLized</li>
						<ul>
							<li>XSLT language constructs are XML elements and attributes</li>
							<li>XPath expressions use their own non-XML syntax (hard to process in XSLT )</li>
							<li>XSLT is a compromise between XML and non-XML</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XQuery Syntax</title>
					<ul>
						<li>XQuery should become the new query language for data</li>
						<ul>
							<li>some say for any data (because relational data can be represented as XML anyway)</li>
							<li>others say for XML data only</li>
							<li>either way, XQuery will become an important language</li>
						</ul>
						<li>XML syntax has been perceived as something people don't like</li>
						<ul>
							<li>there is no hard evidence that XML syntax really is an acceptance problem</li>
							<li>but many people think this might be the case for a query language</li>
							<li>non-XML syntaxes can be more compact than XML syntaxes</li>
						</ul>
						<li>Query language specialists are used to SQL's keyword-based syntax</li>
						<ul>
							<li>XQuery imitates this approach to make it easier to switch</li>
							<li>XML shows up anyway because XQuery constructs XML</li>
							<li>you can't really escape XML when you are working with XML …</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XQuery Syntax Example</title>
					<listing src="book-example.xq"/>
				</slide>
				<slide>
					<title>XQueryX Syntax Example</title>
					<listing src="book-example.xqx" line="8-34"/>
				</slide>
			</part>
			<part id="xquery-flwor">
				<title short="FLWOR">For Let Where Order Return (FLWOR)</title>
				<slide>
					<title>XPath and XQuery</title>
					<ul>
						<li>Every XPath expression is a valid XQuery expression</li>
						<ul>
							<li>if you are good at XPath, you can reuse a lot of your knowledge</li>
							<li>XQuery provides alternative expressions and missing functionality</li>
						</ul>
						<li>XPath is a language for selecting nodes in existing documents</li>
						<ul>
							<li>XPath has no language features to construct new XML trees</li>
							<li>re-ordering a tree (i.e., sorting) also involves constructing a new tree</li>
						</ul>
						<li>XPath is more relevant for XSLT/XQuery experts</li>
						<ul>
							<li>advanced XPath concepts will take you very far</li>
						</ul>
						<li>XQuery expressions can be used to simplify XPaths</li>
						<ul>
							<li>in the same way as in XSLT, there is a trade-off in complexity between the languages</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XPath <q>vs.</q> XQuery</title>
					<ul>
						<li>Choosing a certain style can be a question of taste</li>
					</ul>
					<pre>//reference[starts-with(date/@value, '2000')]/title</pre>
					<listing src="for-where-return.xq" line="3-5"/>
				</slide>
				<slide>
					<title>XQuery's Central Construct</title>
					<ul>
						<li>FLWOR is pronounced <q>flower</q> (it went through several syntax iterations)</li>
						<ul>
							<li>must have at least one <xq>for</xq> or <xq>let</xq></li>
							<li>may have <xq>where</xq> and <xq>order by</xq></li>
							<li>must have <xq>return</xq></li>
						</ul>
						<li>Iteration over a sequence of items</li>
						<ul>
							<li>similar to <xslte>for-each</xslte> or similar loop constructs</li>
							<li>works slightly differently because of XQuery's <link href="xquery-processing"/></li>
							<li><xq>where</xq> is comparable to XPath predicates</li>
						</ul>
					</ul>
				</slide>
				<slide id="xquery-for">
					<title>FLWOR – For</title>
					<ul>
						<li>Iteration over an input sequence</li>
						<ul>
							<li>for each item in the input sequence, a result is calculated</li>
							<li>the result of the <xq>for</xq> clause is the concatenation of all these results</li>
						</ul>
						<li>If there is only a <xq>return</xq>, it is the same as XPath's <link href="xpath20-iterations"/></li>
					</ul>
					<pre>for $i in (1 to 10) return $i * $i</pre>
					<listing src="for-return.xq"/>
				</slide>
				<slide id="xquery-let">
					<title>FLWOR – Let</title>
					<ul>
						<li>Declares a variable and assigns a value to it</li>
						<ul>
							<li>may use previously assigned variables</li>
							<li>can be used any number of times mixed in any order with <xq>for</xq> clauses</li>
						</ul>
					</ul>
					<listing src="let-return.xq"/>
					<listing src="for-let-return.xq"/>
				</slide>
				<slide>
					<title>For &amp; Let</title>
					<ul>
						<li>Variables in most cases are used for convenience</li>
						<ul>
							<li>they avoid repeating things and often result in a more structured query expression</li>
							<li>writing XQueries should not be an exercise in minimizing the character count</li>
						</ul>
						<li>For single items, <xq>for</xq> and <xq>let</xq> can be interchanged (handy for XPath)</li>
					</ul>
					<pre>let $x := 'whatever' return …</pre>
					<pre>for $x in 'whatever' return …</pre>
				</slide>
				<slide id="xquery-where">
					<title>FLWOR – Where</title>
					<ul>
						<li>Filtering the items from <xq>for</xq> and <xq>let</xq> clauses</li>
						<ul>
							<li>there may be at most one <xq>where</xq> clause in a FLWOR expression</li>
							<li>it has to appear after all <xq>for</xq> and <xq>let</xq> clauses</li>
						</ul>
						<li>A <xq>where</xq> clause almost always uses a variable</li>
						<ul>
							<li>they act as filters of the sequences generated by <xq>for</xq> clauses</li>
							<li>they act based on the value of the items in that sequence</li>
						</ul>
					</ul>
					<listing src="for-where-return.xq"/>
					<ul>
						<li>Predicates in XPath expressions do the exact same thing</li>
						<ul>
							<li>but predicates can only be applied to nodes</li>
							<li>SQL users find the <xq>where</xq> approach easier to understand</li>
						</ul>
					</ul>
				</slide>
				<slide id="xquery-order">
					<title>FLWOR – Order by</title>
					<ul>
						<li>FLWOR results are evaluated in the order resulting from the <xq>for</xq> clauses</li>
						<ul>
							<li>for nested clauses, this means a nested evaluation of <xq>for</xq> clauses</li>
							<li>this will often reflect the document order (depending on the <xq>for</xq> XPaths)</li>
						</ul>
						<li>Any order other than this has to be achieved by sorting</li>
						<ul>
							<li>multiple sort keys can be specified separated by commas</li>
							<li>sorting can be done <xq>ascending</xq> or <xq>descending</xq></li>
						</ul>
					</ul>
					<listing src="for-let-order-return.xq"/>
				</slide>
				<slide id="xquery-return">
					<title>FLWOR – Return</title>
					<ul>
						<li>Required in every FLWOR expression</li>
						<li>Constructs new nodes and fills them with values</li>
						<ul>
							<li>element constructors can be used to generate elements</li>
							<li>attribute constructors can be used to generate elements</li>
						</ul>
					</ul>
					<listing src="for-let-order-return-attributes.xq"/>
				</slide>
			</part>
        </part>
        <part id="xquery-processing">
			<title>Processing Model</title>
			<slide id="xquery-tuple-stream">
				<title>Tuple Stream</title>
				<ul>
					<li>FLWOR expressions are the central expression producing results</li>
					<ul>
						<li>nested <xq>for</xq> loops turn sorting into a non-trivial task</li>
						<li>the concept of a <em>tuple stream</em> formally defines FLWOR evaluation</li>
					</ul>
					<li>Each FLWOR expression is generating a tuple stream</li>
					<ul>
						<li><xq>for</xq> and <xq>let</xq> generate a sequence of bound variables, the <em>tuple stream</em></li>
						<li><xq>where</xq> acts as a filter testing tuples against a constraint (discarding some tuples)</li>
						<li><xq>order by</xq> can be used to reorder the (possibly filtered) tuple stream</li>
						<li><xq>return</xq> is evaluated once for each tuple in the filtered, reordered stream</li>
						<li>the result of the expression are the concatenated results of all <xq>return</xq> evaluations</li>
					</ul>
				</ul>
			</slide>
			<slide id="xquery-serialization">
				<title>Serialization</title>
				<ul>
					<li>XQuery creates an <link href="xdm">XDM</link> instance as a result</li>
					<li>XDM instances are sequences of items</li>
					<ul>
						<li>items can be atomic values</li>
						<li>items can be nodes which are nested to form trees</li>
					</ul>
					<li>XQuery itself is not concerned with how to serialize XDM instances</li>
					<ul>
						<li>the language works on XDM and produces XDM</li>
					</ul>
					<li><a href="http://www.w3.org/TR/xslt-xquery-serialization/">XSLT 2.0 and XQuery 1.0 Serialization</a> defines the serialization of XDM instances</li>
					<ul>
						<li>the specification can be used for XSLT 2.0 and XQuery 1.0</li>
						<li>if other serializations are required, this does not affect the core standards</li>
						<li>currently supported output methods are <a href="http://www.w3.org/TR/xslt-xquery-serialization/#xml-output">XML</a>, <a href="http://www.w3.org/TR/xslt-xquery-serialization/#xhtml-output">XHTML</a>, <a href="http://www.w3.org/TR/xslt-xquery-serialization/#html-output">HTML</a>, and <a href="http://www.w3.org/TR/xslt-xquery-serialization/#text-output">Text</a></li>
					</ul>
					<li>Text output means that non-markup structures can be created directly</li>
				</ul>
			</slide>
		</part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Clearer Queries</title>
				<ul>
					<li>XPath is too condensed for many users</li>
					<li>XQuery provides a less compact and easier accessible syntax</li>
					<li>XQuery provides features which are not part of XPath</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="xquery-2">
        <title short="XQuery 2">XML Query (XQuery) – Part II</title>
        <date>2007-11-15</date>
        <toc class="resources"><a href="http://www.ibm.com/developerworks/xml/library/x-wxxm34.html">XQuery/XSLT Comparison</a></toc>
        <toc class="abstract">XQuery has been built on top of XPath 2.0, which means it uses the same foundation as XSLT 2.0. Both languages have a large overlap, and according to personal preferences and the XML task, one language may be preferred over the other. Features such as <em>user-defined functions</em> and <em>schema-awareness</em> bring XQuery even closer to XSLT 2.0, making the decision to choose one over the other mostly a question of personal preference.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>XQuery &amp; XSLT</title>
			<slide>
				<title>XPath 2.0 vs. XSLT 2.0</title>
				<ul>
					<li><link href="xpath20">XPath 2.0</link> is powerful but incomplete</li>
					<ul>
						<li>cannot create nodes (only selection of existing nodes)</li>
						<li>does not support user-defined functions</li>
						<li>no support for grouping</li>
						<li>no support for sorting</li>
					</ul>
					<li><link href="xslt20-1">XSLT 2.0</link> extends XPath 2.0 with additional features</li>
					<li>XSLT 2.0 adds more features which were missing in <link href="xslt-1">XSLT 1.0</link></li>
					<ul>
						<li><link href="xslt20-analyze-string"><xslte>analyze-string</xslte></link> for processing strings</li>
						<li><xpath>format-date()</xpath> and <xpath>format-number()</xpath> for formatted values</li>
						<li><link href="xslt20-grouping"><xslte>for-each-group</xslte></link> for easy grouping and access to group information</li>
						<li><link href="xslt-keys"/> for optimizing and organizing access to nodes</li>
					</ul>
					<li>XSLT's <em>functional programming model</em> has nothing to do with XPath</li>
				</ul>
			</slide>
			<slide>
				<title>Turing Completeness</title>
				<ul>
					<li><link href="xslt20-1">XSLT 2.0</link> and XQuery are both <a href="http://en.wikipedia.org/wiki/Turing-complete">Turing complete</a></li>
					<ul>
						<li>any algorithm computing output from input can be implemented</li>
						<li>Turing completeness says nothing about how <em>appropriate</em> a language is for a task</li>
					</ul>
					<li>Choosing between XSLT 2.0 and XQuery is a matter of taste and politics</li>
					<ul>
						<li>the <em>functional style</em> of XSLT is hard to get used to</li>
						<li>most people feel more comfortable with the more traditional design of XQuery</li>
						<li>providing and using one of the languages is a long-term decision</li>
					</ul>
					<li>Choosing between XSLT 2.0 and XQuery also depends on the task</li>
					<ul>
						<li>simple extraction of XML content is mostly XPath 2.0 anyway</li>
						<li>generating rigid schemas from XML sources works well in both languages</li>
						<li>document processing (<link href="mixed-content"/>) is better done with XSLT 2.0</li>
						<li>large scale data processing is better done with XQuery</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XSLT Assumptions</title>
				<ul>
					<li>Transformations process the complete input</li>
					<ul>
						<li>the <link href="xslt-processing-model"/> works well for processing most nodes</li>
						<li><link href="xslt-builtin"/> can be used for applying default transformations</li>
					</ul>
					<li>XSLT users are highly skilled in XML</li>
					<ul>
						<li>XSLT's XML syntax is hard to learn and use for non-XML experts</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XQuery Assumptions</title>
				<ul>
					<li>Queries only extract selected part of the input</li>
					<ul>
						<li>there is no default behavior</li>
						<li>understanding the control flow in XQuery is much easier than in XSLT</li>
					</ul>
					<li>XQuery authors are not necessarily XML experts</li>
					<ul>
						<li>input and output in many cases will be XML</li>
						<li>the language syntax should be as human-oriented as possible</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XSLT BlogXML Processor</title>
				<listing src="blog2html.xsl"/>
			</slide>
			<slide>
				<title>XQuery BlogXML Processor</title>
				<listing src="blog2html.xq" line="3-18"/>
			</slide>
			<slide>
				<title>2-Step vs. 1-Step</title>
				<ul>
					<li>Web applications often are built as <em>multi-tier applications</em></li>
					<li>REST services provide the interface to the data</li>
					<ul>
						<li>they provide access to resources which are managed by the service</li>
						<li>in many cases the data representation is some XML format</li>
					</ul>
					<li>Interfaces than use that data in various ways</li>
					<ul>
						<li>UIs may present it in HTML</li>
						<li>APIs may present it in some API-specific way (e.g., import/export)</li>
					</ul>
					<li>XQuery is well-suited to retrieve data from an XDBMS</li>
					<li>XSLT is well-suited to transform data to a specific presentation format</li>
				</ul>
			</slide>
			<slide>
				<title>Web App with XQuery &amp; XSLT</title>
				<img style="width : 88% ; margin : 4% ; " src="web-app-tiers.png" title="Web Application Tiers with XQuery and XSLT"/>
			</slide>
		</part>
		<part>
			<title>More XQuery Details</title>
			<slide>
				<title>Comma Operator</title>
				<ul>
					<li>Constructing sequences from separate expressions</li>
					<pre>&lt;a>123&lt;/a>, &lt;b>456&lt;/b></pre>
					<pre>doc("books.xml")//author, doc("books.xml")//title</pre>
					<li>Parentheses must be used to group expressions correctly</li>
					<pre>for $b in doc("books.xml")//book return   $b/title, <span style="color : red ; ">$b/author</span></pre>
					<pre>for $b in doc("books.xml")//book return ( $b/title, $b/author )</pre>
				</ul>
			</slide>
			<slide>
				<title>Whitespace in XQuery</title>
				<ul>
					<li>XQuery distinguishes three kinds of whitespace</li>
					<ul>
						<li><em>ignorable whitespace</em> occurring between syntax terminals (e.g., keywords)</li>
						<li><em>boundary whitespace</em> occurring between tags and/or enclosed expressions</li>
						<li><em>literal whitespace</em> occurring inside literals</li>
					</ul>
					<li><xq>boundary-space</xq> controls the interpretation of boundary whitespace</li>
					<ul>
						<li><xq>strip</xq> declares boundary whitespace to be insignificant</li>
						<li><xq>preserve</xq> declares boundary whitespace to be significant</li>
					</ul>
				</ul>
				<pre>declare boundary-space strip;
let $a := "Bob Glushko"
return
  &lt;book>
    &lt;title>Document Engineering&lt;/title>
    &lt;author> { $a } &lt;/author>
  &lt;/book></pre>
			</slide>
			<slide>
				<title>Constructors</title>
				<ul>
					<li>XQuery allows literal XML or <em>computed constructors</em></li>
					<li>Literal XML are called <em>direct constructors</em></li>
					<pre>&lt;book>
  &lt;title>Document Engineering&lt;/title>
  &lt;author>Bob Glushko&lt;/author>
  &lt;price currency="USD">29.99&lt;/price>
&lt;/book></pre>
					<li><em>Computed Constructors</em> can be used for calculating names</li>
					<pre>element "book" {
  element "title" { "Document Engineering" },
  element "author" { "Bob Glushko" },
  element "price" {
    attribute "currency" { "USD" },
    29.99
  }
}</pre>
					<li>XSLT has the same concepts (<link href="xslt-literal"/> and <link href="xslt-element"><xslte>element</xslte></link>)</li>
				</ul>
			</slide>
		</part>
		<part id="xquery-functions">
			<title>User-Defined Functions</title>
			<slide>
				<title>Reusable code in XQuery</title>
				<ul>
					<li>Named units of XQuery code</li>
					<li>Zero or more input parameters (type information is optional)</li>
					<li>Produces a result (type information is optional)</li>
					<li>Function body is an XQuery expression</li>
					<ul>
						<li>there is no explicit return statement</li>
						<li>the <xq>return</xq> of a <link href="xquery-flwor">FLWOR expression</link> may take over that role</li>
					</ul>
					<li>a predefined <xq>local</xq> namespace prefix avoids namespace inflation</li>
				</ul>
				<pre>declare function local:onetwothree() as xs:integer+ { (1, 2, 3) };</pre>
			</slide>
			<slide>
				<title>Local Function</title>
				<listing src="blog2html.xq"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XQuery vs. XSLT</title>
				<ul>
					<li>XQuery is a complete programming language</li>
					<li>XSLT can do everything XQuery can and vice versa</li>
					<li>Choosing one over the other depends on task and taste</li>
					<li>REST scenarios have good places for both languages</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="xdbms">
        <title short="XDBMS">XML Databases</title>
        <date>2007-11-20</date>
        <toc class="resources"><a href="http://www-128.ibm.com/developerworks/library/x-mxd5/" title="Managing XML data: eXist: An open source native XML database">eXist</a></toc>
        <toc class="abstract"><em>XML Databases</em> are specialized databases for handling XML data. As their query language, they will often use XQuery, but they need additional technologies for updating and storing data. XQuery currently is a read-only language, so update facilities must be provided as an addition to XQuery querying capabilities. One of the big advantages of databases vs. file systems are optimized storage (and thus access) structures, and in the case of XML databases this means storing XML documents other than as text files.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Files vs. Databases</title>
			<slide>
				<title>Abstraction Layers</title>
				<ul>
					<li>Files systems are general-purpose mechanisms for managing data</li>
					<ul>
						<li>files may contain any data that can be encoded as a sequence of bytes</li>
						<li>file systems maintain some metadata about files (owner, dates, permissions)</li>
						<li>data management is limited to reading or writing streams of bytes</li>
					</ul>
					<li>Databases are specialized tools for managing data</li>
					<ul>
						<li>they prescribe a logical model which defines the type of data to work with</li>
						<li>they provide operations <em>on this logical model only</em> (and not on the physical model)</li>
						<li>the physical model can be optimized to provide better performance/security/reliability</li>
						<li>the physical model can be stored in files or as raw data without a file system</li>
					</ul>
					<li>Relational databases (RDBMS) use tables as their logical model</li>
					<li>XML databases (XDBMS) use <link href="xdm">XDM</link> (<q>typed Infosets</q>) as their document model</li>
				</ul>
			</slide>
			<slide>
				<title>File-Based XQuery</title>
				<img style="width : 86% ; margin : 4% ; " src="xquery-overview-filesystem.png" title="File-based XQuery Processing"/>
			</slide>
			<slide>
				<title>Database-Based XQuery</title>
				<img style="width : 86% ; margin : 4% ; " src="xquery-overview-dbms.png" title="DB-based XQuery Processing"/>
			</slide>
		</part>
		<part>
			<title>XML Storage and Retrieval</title>
			<slide>
				<title>Database Management</title>
				<ul>
					<li>Databases are optimized data management systems</li>
					<ul>
						<li>data must be structured according to the <em>Data Definition Language (DDL)</em></li>
						<li>it can only be manipulated using the <em>Data Manipulation Language (DML)</em></li>
						<li>DDL and DML allow databases to implement optimized storage and retrieval</li>
					</ul>
					<li>XML is a new DDL, and relational databases cannot handle XML natively</li>
					<ul>
						<li>relational databases <link href="xml+dbms">try to adapt to the new world of XML data</link></li>
						<li>XML databases must implement their own optimized storage and retrieval</li>
					</ul>
					<li>XML documents do not have to be stored as text-based XML document files</li>
					<ul>
						<li>XML is the <em>data model</em> an application expects when working with XML</li>
						<li><em>XML storage</em> can be optimized for various purposes, one example is <em>Persistent DOM (PDOM)</em></li>
						<li>database data structures always depend on the expected <em>write vs. read</em> ratio</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Level-Order Numbering</title>
				<img style="width : 86% ; margin : 4% ; " src="exist-index-old.png" href="http://exist.sourceforge.net/xmlprague06.html#N1014E" tite="eXist 1.0 Indexing Scheme"/>
			</slide>
			<slide>
				<title>Dewey Decimal Classification (DDC)</title>
				<img style="width : 86% ; margin : 4% ; " src="exist-index-new.png" href="http://exist.sourceforge.net/xmlprague06.html#N10172" tite="eXist 1.1 Indexing Scheme"/>
			</slide>
			<slide>
				<title>Middle Insert with DDC</title>
				<img style="width : 86% ; margin : 4% ; " src="xdbms-middle-insert.png"/>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/boe04">T. Böhme, E. Rahm, <em>Supporting Efficient Streaming and Insertion of XML Data in RDBMS</em>, Proceedings of DIWeb 2004, June 2004</a></p>
			</slide>
			<slide>
				<title>Dynamic Level Numbers (DLN)</title>
				<img style="width : 86% ; margin : 4% ; " src="xdbms-dln.png"/>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/boe04">T. Böhme, E. Rahm, <em>Supporting Efficient Streaming and Insertion of XML Data in RDBMS</em>, Proceedings of DIWeb 2004, June 2004</a></p>
			</slide>
		</part>
		<part id="xdbms-access">
			<title>XDBMS Access</title>
			<slide id="jdbc">
				<title>JDBC</title>
				<ul>
					<li>Database systems are stand-alone applications</li>
					<ul>
						<li>they provide the service of storing, querying, and updating data</li>
						<li>they are often accessed from various applications in an IT landscape</li>
					</ul>
					<li>JDBC is the standard Java technology to connect to a database</li>
					<ul>
						<li>JDBC allows standardized access from Java programs to relational databases</li>
						<li>database vendors provide a JDBC driver for their database product</li>
					</ul>
					<li>JDBC accepts and sends them to the database system</li>
					<ul>
						<li><code>SELECT</code> returns row result set, i.e. the number of rows generated ny the query</li>
						<li><code>INSERT</code>, <code>UPDATE</code>, and <code>DELETE</code> return a simple count (database rows affected)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>JDBC Type 1 &amp; 2</title>
				<table width ="90%">
					<tr>
						<td align ="center" valign ="center">
							<img width="70%" src ="jdbc_type_1.png" title="JDBC Type 1" longdesc="http://en.wikipedia.org/wiki/JDBC_driver"/>
						</td>
						<td align ="center" valign ="center">
							<img width="70%" src ="jdbc_type_2.png" title="JDBC Type 2" longdesc="http://en.wikipedia.org/wiki/JDBC_driver"/>
						</td>
					</tr>
				</table>			
			</slide>
			<slide>
				<title>JDBC Type 3 &amp; 4</title>
				<table width ="90%">
					<tr>
						<td align ="center" valign ="center">
							<img width="70%" src ="jdbc_type_3.png" title="JDBC Type 3" longdesc="http://en.wikipedia.org/wiki/JDBC_driver"/>
						</td>
						<td align ="center" valign ="center">
							<img width="70%" src ="jdbc_type_4.png" title="JDBC Type 4" longdesc="http://en.wikipedia.org/wiki/JDBC_driver"/>
						</td>
					</tr>
				</table>			
			</slide>
			<slide id="xapi">
				<title>XML:DB API (XAPI)</title>
				<ul>
					<li><a href="http://xmldb-org.sourceforge.net/">XML:DB</a> was an initiative of XDBMS providers and supporters</li>
					<ul>
						<li>it was founded when XDBMS was not a mainstream concept</li>
						<li>none of the big players ever participated in the group</li>
						<li>no longer active and some of its <a href="http://xmldb-org.sourceforge.net/credits.html">members</a> have already disappeared</li>
					</ul>
					<li>XML:DB has published some influential draft documents</li>
					<ul>
						<li>XML:DB API (XAPI) was their proposal for how an XDBMS API could look like</li>
						<li><link href="xupdate"/> was their proposal for an update language for XDBMS</li>
					</ul>
					<li>The latest <a href="http://xmldb-org.sourceforge.net/xapi/xapi-draft.html">XAPI Draft</a> is dated 09/2001</li>
					<ul>
						<li>it uses XPath as the <q>query language</q></li>
						<li>it uses XUpdate as the update language</li>
						<li>it predates <link href="xdm">XDM</link> and <link href="xquery-1">XQuery</link>, the two essential XDBMS technologies today</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Database Connection</title>
				<ul>
					<li>Currently there is no <q>JDBC</q> for XDBMS</li>
					<ul>
						<li>JDBC is not applicable, <link href="xapi">XAPI</link> is outdated</li>
						<li>JDBC may be updated (IBM prefers this approach over XQJ)</li>
					</ul>
					<li><a href="http://dret.net/biblio/reference/jsr225">XQuery for Java (XQJ)</a> is in the pipeline of the <em>Java Community Process (JCP)</em></li>
					<ul>
						<li>it is still in draft stage (version 0.9 public review) and may change</li>
						<li>it is in development since 06/2003 and the main concepts seem to be stable now</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XQJ Example</title>
				<pre>// establish a connection to the XQuery engine
XQConnection conn = xqds.getConnection();
// create an expression object that is later used to execute an XQuery expression
XQExpression expr = conn.createExpression();
// The XQuery expression to be executed
String es = "for $n in fn:doc('catalog.xml')//item" + " return fn:data($n/name)";
// execute the XQuery expression
XQResultSequence result = expr.executeQuery(es);
// process the result (sequence) iteratively
while (result.next()) {
	// retrieve the current item of the sequence as a String
	String str = result.getAtomicValue();
	System.out.println("Product name: " + str);
}
// free all resources allocated for the result
result.close();
// free all resources allocated for the expression
expr.close();
// free all resources allocated for the connection
conn.close();</pre>
			</slide>
			<slide>
				<title>HTTP Access</title>
				<ul>
					<li>XDBMS access can be regarded as a <a href="../web-fall07/rest">REST service</a></li>
					<ul>
						<li>resource formats are whatever the database manages, but always XML</li>
						<li>the usual CRUD operations of databases can be mapped to HTTP methods</li>
					</ul>
					<li>HTTP access provides an elegant and flexible interface</li>
					<ul>
						<li>database access can be managed in the same way as any other Web-based service</li>
						<li>many applications have access to the database system</li>
					</ul>
					<li>HTTP database access is not appropriate for all scenarios</li>
					<ul>
						<li>high-throughput, high-volume applications need a better optimized solution</li>
						<li>plain HTTP access does not provide support for transactions or security</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xdbms-update">
			<title>Updating XDBMS</title>
			<slide>
				<title>XQuery</title>
				<ul>
					<li><link href="xquery-1">XQuery</link> is a read-only language</li>
					<ul>
						<li>queries a collection of XML documents (<link href="xdm">XDM</link> instances)</li>
						<li>returns an XDM instance, serialized as XML or something else</li>
					</ul>
					<li>Updating XML databases currently is not covered by a widely accepted standard</li>
					<ul>
						<li><link href="xupdate"/> is a simple and rather old solution (04/2000)</li>
						<li>various <link href="xquery-update-extensions"/> have been proposed for XQuery</li>
						<li>the W3C is working on an <em href="http://www.w3.org/TR/xquery-update-10/">XQuery Update Facility</em>, but this will not be finished for some time</li>
						<li>XML database implementers often introduce proprietary update facilities</li>
					</ul>
				</ul>
			</slide>
			<slide id="xupdate">
				<title>XUpdate</title>
				<ul>
					<li><a href="http://xmldb-org.sourceforge.net/xupdate/xupdate-wd.html">XUpdate</a> defines an language for specifying XML updates</li>
					<ul>
						<li>the data model is based on XPath 1.0</li>
						<li>the syntax is based on XML</li>
					</ul>
					<li>XUpdate has no connections with a query language, it is for updates only</li>
					<pre><![CDATA[<addresses version="1.0"> 
  <address id="1"> 
    <fullname>Andreas Laux</fullname> 
    <born day='1' month='12' year='1978'/> 
  </address> 
</addresses>]]></pre>
				<pre><![CDATA[<xupdate:modifications version="1.0" xmlns:xupdate="http://www.xmldb.org/xupdate"> 
  <xupdate:insert-after select="/addresses/address[1]" > 
    <xupdate:element name="address">
      <xupdate:attribute name="id">2</xupdate:attribute>
      <fullname>Lars Martin</fullname> 
      <born day='2' month='12' year='1974'/> 
    </xupdate:element> 
  </xupdate:insert-after> 
</xupdate:modifications>]]></pre>
				</ul>
			</slide>
			<slide id="xquery-update-extensions">
				<title>XQuery Update Extensions</title>
				<ul>
					<li><link href="xquery-1">XQuery 1.0</link> had been planned to be a read-only language</li>
					<ul>
						<li>creating a fully functional language would have been to ambitious</li>
						<li>with a solid formal foundation, XQuery can be upgraded to also provide update features</li>
					</ul>
					<li>Several XQuery update extensions have been proposed</li>
					<ul>
						<li>updating goes through a consolidation phase similar to querying</li>
						<li>the eventual XQuery update facility will be integrated with XPath</li>
					</ul>
					<li>W3C's <a href="http://www.w3.org/TR/xqupdate/">XQuery Update Facility</a> is in early draft status</li>
				</ul>
				<pre>do insert &lt;year>2005&lt;/year> after fn:doc("bib.xml")/books/book[1]/publisher</pre>
				<pre>do delete fn:doc("bib.xml")/books/book[1]/author[last()]</pre>
				<pre>do replace fn:doc("bib.xml")/books/book[1]/publisher with fn:doc("bib.xml")/books/book[2]/publisher</pre>
			</slide>
		</part>
        <part id="exist">
			<title>eXist</title>
			<slide>
				<title>Java XDBMS</title>
				<ul>
					<li>Java-based XML database with its own data storage</li>
					<ul>
						<li>not the fastest implementation choice</li>
						<li>can be run on any platform providing a 1.4 JRE</li>
					</ul>
					<li>Usable as standalone or embedded in Cocoon</li>
					<ul>
						<li>standalone provides HTTP access and is usable by any application</li>
						<li>embedded integrates eXist into Cocoon and turns Cocoon into a <em>Content Management System (CMS)</em></li>
					</ul>
					<li>Management through Java client or Web-based management console</li>
					<ul>
						<li>Java client is the older management tool and has more features</li>
						<li>Web-based management tool is work in progress and has some (Web-specific) limitations</li>
					</ul>
					<li>XML documents are stored in a proprietary format</li>
					<ul>
						<li>structurally indexed trees of the XML document</li>
						<li>additional <link href="indexing">indices</link> can be managed to enable faster querying</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Collections</title>
				<ul>
					<li>XML databases can store and retrieve XML documents</li>
					<ul>
						<li>in file systems, directories are used to organize the storage of files</li>
						<li>many XML databases use <em>collections</em> to organize the storage of XML documents</li>
					</ul>
					<li>Like directories, collections can be nested</li>
					<ul>
						<li>Queries affect all documents of a collection</li>
					</ul>
				</ul>
			</slide>
			<slide id="indexing">
				<title>Indexing</title>
				<ul>
					<li>Indexing makes XQueries much more efficient</li>
					<ul>
						<li>DBMS indices cost storage space and are expensive to update</li>
						<li>DBMS indices can be used for faster query processing</li>
					</ul>
					<li>eXist has three kinds of indices</li>
					<ul>
						<li><em>Structural Index</em> for the document structures (cannot be modified by users)</li>
						<li><em>Fulltext Index</em> indexes the content of element and attributes (can be disabled)</li>
						<li><em>Range Index</em> for type-specific indexing of specific elements or attributes (must be enabled)</li>
					</ul>
					<li>Disabling fulltext indexing in eXist can yield surprising results</li>
					<ul>
						<li>rather than searching the collection, the database returns an empty result</li>
						<li>XQuery execution depends on fulltext indices (not only in terms of performance)</li>
					</ul>
				</ul>
			</slide>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>XML Collections</title>
				<ul>
					<li>XDBMS are useful for managing large document collections</li>
					<li>Managing a DBMS requires a minimum amount of expertise</li>
					<li>Performance benefits can be tremendous</li>
					<li>XDBMS are still in their early years, expect some surprises …</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="xml+dbms">
        <title short="XML &amp; DBMS">XML and Databases</title>
        <date>2007-12-04</date>
        <toc class="resources"><a href="http://www.rpbourret.com/xml/XMLAndDatabases.htm" title="Ronald Bourret's XML and Databases FAQ">FAQ</a></toc>
        <toc class="abstract">While XML databases are a good solution for managing XML content, frequently it is necessary to uses non-XML databases for managing XML content. In most cases, these databases will be relational databases. There a two major approaches of how to manage XML content in a relational database. The first approach is to define a mapping between XML and relational structures and work with this mapping. The second approach is to use the XML-specific functionality, which is increasingly provided by relational databases, turning them into <em>XML-aware databases</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
 		<slide>
			<title>XML is Trees</title>
			<ul>
				<li>XML documents are trees</li>
				<ul>
					<li>applications may have different internal data models (mapped to trees for interfacing)</li>
					<li>the exchange and processing of XML documents is tree-based</li>
				</ul>
				<li>Where and how is XML being used?</li>
				<ul>
					<li>as a pure transfer syntax (Web Services very often are used like this)</li>
					<li>as artifacts that have a longer lifespan (archiving of XML business documents)</li>
					<li>as the applications data model (there is nothing but XML)</li>
				</ul>
				<li>XML usage results in very different requirements for XML tools</li>
				<ul>
					<li>Web Service programmers often never see the tree</li>
					<li>archived XML documents need to be searchable</li>
					<li>XML-centric applications need to store XML efficiently</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Storing XML</title>
			<ul>
				<li>XML documents are text files</li>
				<ul>
					<li>they can be stored in file systems (they are <q>self-describing</q>)</li>
					<li>they can be retrieved by searching through the file system</li>
				</ul>
				<li>File systems are not designed to store millions of documents</li>
				<ul>
					<li>standard file system implementation usually slow down dramatically</li>
					<li>standard procedures (backup/restore/concurrency) do not work well</li>
				</ul>
				<li>Problems with <q>File Systems as XML Databases</q></li>
				<ul>
					<li>the number of documents is too large</li>
					<li>there is no structured access (<a href="http://sourceforge.net/projects/xpsh">XPath Shell (XPsh)</a> provides an <q>XML-<code>find/grep</code></q>)</li>
					<li>there is no access optimization (XPsh is very slow)</li>
				</ul>
			</ul>
		</slide>
		<part id="rdbms">
			<title>Relational Databases</title>
			<slide>
				<title>Generic XML Storage</title>
				<ul>
					<li>Relational databases are the state of the art since 1976</li>
					<ul>
						<li>this is long enough to build highly optimized and robust systems</li>
						<li>this is long enough to have ER hard-wired into some brains</li>
					</ul>
					<li>XML is more powerful than ER</li>
					<ul>
						<li>repetitions of elements do not map well</li>
						<li>choices do not map well</li>
						<li>ordered content does not map well</li>
						<li>mixed content does not map well</li>
					</ul>
					<li>Storing XML in a relational database is hard</li>
					<ul>
						<li>it can be done by piggybacking structural information as content</li>
						<li>using the resulting structures is awkward and very inefficient</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Tree Table</title>
				<table width="90%" style="margin : 4%">
					<tr>
						<td valign="middle" align="center">
							<img style="width : 90% ; margin : 2% ; " src="tree-table.png"/>
						</td>
						<td valign="middle" align="center">
							<table style="width : 90% ; " border="1">
								<tr>
									<th>ID</th>
									<th>Type</th>
									<th>Name</th>
									<th>Value</th>
									<th>Parent</th>
									<th>Left</th>
								</tr>
								<tr>
									<td>1</td>
									<td>Root</td>
									<td></td>
									<td></td>
									<td></td>
									<td></td>
								</tr>
								<tr>
									<td>2</td>
									<td>Element</td>
									<td>a</td>
									<td></td>
									<td>1</td>
									<td></td>
								</tr>
								<tr>
									<td>3</td>
									<td>Element</td>
									<td>b</td>
									<td></td>
									<td>2</td>
									<td></td>
								</tr>
								<tr>
									<td>4</td>
									<td>Element</td>
									<td>c</td>
									<td></td>
									<td>2</td>
									<td>3</td>
								</tr>
								<tr>
									<td>5</td>
									<td>Text</td>
									<td></td>
									<td><q>Text</q></td>
									<td>3</td>
									<td></td>
								</tr>
								<tr>
									<td>6</td>
									<td>Attribute</td>
									<td>att</td>
									<td><q>42</q></td>
									<td>4</td>
									<td></td>
								</tr>
							</table>
						</td>
					</tr>
				</table>
			</slide>
		</part>
		<part>
			<title>Database Support for XML</title>
			<slide>
				<title>Why XML and Databases?</title>
				<ul>
					<li>XML is constantly getting more popular</li>
					<ul>
						<li>XML as a document format was first used as <em>wire format</em> only</li>
						<li>instead of parsing manually, parser interfaces provide better XML support</li>
						<li><em>data binding frameworks</em> bind XML even more tightly into applications</li>
						<li>if all programs somehow <q>hide the XML</q>, why not work on XML directly?</li>
					</ul>
					<li>What is XML for an application?</li>
					<ul>
						<li>an (increasingly popular) way to represent the data?</li>
						<li>the data itself?</li>
						<li>currently, the representation perspective is more popular</li>
						<li>as XML is increasingly penetrating applications, this may change</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML Interchange</title>
				<img style="height : 75% ; margin : 2% ; " src="xml-dbms-application.png"/>
			</slide>
			<slide id="xdbms-dbms-xmlsupport">
				<title>XML Support in DBMS</title>
				<img style="height : 75% ; margin : 2% ; " src="xml-dbms-xmlsupport.png"/>
			</slide>
			<slide>
				<title>XML DBMS</title>
				<img style="height : 75% ; margin : 2% ; " src="xml-dbms-xdbms.png"/>
			</slide>
		</part>
		<part>
			<title>XML Storage in Databases</title>
			<slide>
				<title>Model Mapping</title>
				<ul>
					<li>Relational databases are not good tools for storing XML</li>
					<ul>
						<li>they might be appropriate if the schema disallows problematic constructs</li>
						<li>they often are already deployed and applications must live with them</li>
					</ul>
					<li>If the data model is ER-oriented, relational databases are good tools</li>
					<ul>
						<li>XML may be invisible from the model point of view</li>
						<li>parts of the model may be encoded as an XML schema</li>
					</ul>
					<li>If the XML is not visible in the model, it can be structurally inaccessible</li>
					<ul>
						<li>e.g., a product catalog may contain product descriptions in XHTML rich text snippets</li>
						<li>for managing the product catalog data, the XHTML is not relevant</li>
					</ul>
					<li>If the XML is part of the model, it should be accessible structurally</li>
					<ul>
						<li>if the product catalog XHTML contains links to other products, these links are important</li>
						<li>they could be extracted (creating redundant and hard to maintain data)</li>
						<li>if they are hidden in the XHTML, all XHTML snippets have to be parsed</li>
						<li>ideally, the database should be able to <q>query the XHTML snippet</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML is Text</title>
				<ul>
					<li>XML documents can be stored as text</li>
					<ul>
						<li>databases typically have various datatypes for text storage</li>
						<li>if the database supports Unicode, any XML document can be stored</li>
					</ul>
					<li>The XML structure is completely invisible to the database</li>
					<ul>
						<li>working with the XML requires querying and parsing the XML text</li>
						<li>this kind of storage does not allow any querying of the XML content</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML → ∗LOB</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-lob.png"/>
			</slide>
			<slide id="xdbms-xmldatatype">
				<title>XML as a Datatype</title>
				<ul>
					<li>SQL supports a wide variety of datatypes</li>
					<ul>
						<li>typed values are better than untyped values (they enable type-specific operations)</li>
						<li>XML can be regarded as just another data type</li>
					</ul>
					<li>Introducing a datatype lets the database recognize the data</li>
					<ul>
						<li>XML data can be stored in some format (a <q>persistent DOM</q>)</li>
						<li>databases can provide functionality avoiding parsing/serialization (DOM-based)</li>
					</ul>
				</ul>
			</slide>
			<slide id="xdbms-xmltype">
				<title>XML Datatype</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-datatype.png"/>
			</slide>
			<slide>
				<title>Mapping XML to Models</title>
				<ul>
					<li>Model-relevant data must be mapped to the database structures</li>
					<ul>
						<li>this assumes there is a ER-model which describes the database structure</li>
						<li>mapping XML is easy by definition because the XML is ER-compliant</li>
					</ul>
					<li>Is the data accessed as table data?</li>
					<ul>
						<li>if shredded data is only used to assemble it again, it is just performance overhead</li>
						<li>if shredded data is accessed relationally, then shredding makes sense</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Shredding (XML → Columns)</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-shredding.png"/>
			</slide>
			<slide>
				<title>XML as First-Class Citizen</title>
				<ul>
					<li>The <link href="xdbms-xmltype"/> defines XML as a sub-concept of ER</li>
					<ul>
						<li>the overall structure of the database is relational</li>
						<li>attributes may be of type XML, which means storing trees in tables</li>
					</ul>
					<li>Tables are not the only way to see the world</li>
					<ul>
						<li>XML trees are an <em>alternative</em> to tables, not a <em>datatype</em></li>
						<li>XML-centric applications should not be forced to use tables at all</li>
					</ul>
					<li>XML can be regarded as replacing the ER-concept altogether</li>
					<ul>
						<li>the database simply stores XML documents</li>
						<li>applications can store, query, update, and manage XML documents in the database</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML DBMS</title>
				<img style="width : 90% ; margin : 2% ; " src="xml-storage-xdbms.png"/>
			</slide>
		</part>
		<part>
			<title>XML in Relational Databases</title>
			<slide>
				<title>RDBish XML</title>
				<ul>
					<li>XML schemas can be designed with databases in mind</li>
					<ul>
						<li>avoid unbounded repetitions of elements</li>
						<li>avoid choices</li>
						<li>avoid ordered content</li>
						<li>avoid mixed content</li>
					</ul>
					<li>Many XML schemas are designed RDBish for compatibility reasons</li>
					<ul>
						<li>it was decided that the XML should enable an easy mapping to relational structures</li>
						<li>the person designing the schema has a ER-structured brain</li>
						<li>the schema has been generated from a relational database schema</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Problematic XML</title>
				<ul>
					<li>XML in its full glory is too much for tables</li>
					<ul>
						<li>XML has been developed as a document format</li>
						<li>XML is about hierarchy (which <em>intentionally</em> have been left out of ER)</li>
						<li>XML is about highly irregular structures</li>
					</ul>
					<li>XML often is said to have to <q>flavors</q></li>
					<ul>
						<li><em>data-oriented XML</em>: regular data which can be easily mapped to tables</li>
						<li><em>document-oriented XML</em>: irregular structures which are hard to map to tables</li>
						<li>real-world XML often is a bit of both (e.g., <em>content</em> and <em>metadata</em>)</li>
					</ul>
					<li>Hybrid approaches sometimes are a good solution</li>
					<ul>
						<li>data-oriented can be shredded and stored in tables</li>
						<li>the document-oriented rest is stored as one object (text or <code>XML</code>)</li>
					</ul>
				</ul>
			</slide>
			<part id="sqlxml">
				<title>SQL/XML</title>
				<slide>
					<title>SQL/XML:2003</title>
					<ul>
						<li>SQL/XML provides <link href="xdbms-dbms-xmlsupport"/>s</li>
						<ul>
							<li>it introduces <link href="xdbms-xmldatatype"/></li>
							<li>it introduces a number of operations for generating XML from query results</li>
							<li>it defines mappings to bridge both worlds (SQL and XML)</li>
						</ul>
						<li>SQL/XML does not change anything about the database model</li>
						<ul>
							<li>data is still stored in tables only</li>
							<li>a column of a table may use the <code>XML</code> type</li>
							<li>queries may return results in XML rather than as SQL result sets</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>SQL/XML Example</title>
					<pre>SELECT
  e.EmpId,
  e.FirstName,
  e.LastName,
  e.StartDate,
  e.EndDate
FROM Employees e WHERE e.EmpId = 12</pre>
				<pre>SELECT
  XMLELEMENT(NAME "employee",
    XMLATTRIBUTES(e.EmpId as "id"),
    XMLELEMENT(NAME "names",
    XMLELEMENT(NAME "first", e.FirstName),
    XMLELEMENT(NAME "last", e.LastName)),
    XMLELEMENT(NAME "hire-dates",
      XMLATTRIBUTES(e.StartDate as "start", e.EndDate as "end")))
FROM Employees e WHERE e.EmpId = 12</pre>
				</slide>
				<slide>
					<title>SQL/XML:2007</title>
					<ul>
						<li>Adds the concept of <em>XML Tables</em></li>
						<li>XML Tables are not tables, they are containers for XML</li>
						<li>SQL/XML:2007 changes the database's data model</li>
						<ul>
							<li>it is now possible to have a database with <q>no tables</q></li>
							<li>likely use cases are to have both: traditional and XML tables</li>
						</ul>
						<li>SQL/XML:2007 defines a hybrid database: relational and XML database</li>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Tables and Trees don't Mix</title>
				<ul>
					<li>Tables and trees are different data models</li>
					<li>Different technologies are used to handle these different models</li>
					<li>Think before choosing the wrong tool</li>
				</ul>
			</slide>
			<slide>
				<title>Database Technologies do Mix</title>
				<ul>
					<li>Relational databases are good tools for regular data</li>
					<li>XML databases are good tools for document-oriented XML</li>
					<li>SQL/XML:2007 defines a database that does both</li>
					<li>Applications can choose the best mix of tables and trees</li>
				</ul>
			</slide>
		</part>
    </presentation>
	<presentation id="trends">
		<title short="XML Trends">XML Trends &amp; Developments</title>
		<date>2007-12-06</date>
		<toc class="resources"><a href="http://www.w3.org/XML/Activity">W3C XML Activity Statement</a></toc>
		<toc class="abstract">XML is a very basic technology for representing trees using a standardized markup-based syntax. An increasing number of technologies are building on this foundation, creating an expanding field of XML-based technologies for interoperability in many different fields. Application-specific XML-based data formats are used in many different settings, and the best data format for a given scenario depends on the existing formats in this area and the exact requirements. More interestingly, generic XML technologies which can be applied in many different settings make it easier for developers and system integrators to achieve their goal of making system interoperate.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="webservices">
			<title>Web Services</title>
			<slide>
				<title>XML-Based Distributed Programming</title>
				<ul>
					<li>XML exchanges often have to be negotiated in advance</li>
					<ul>
						<li>the transport mechanism need to be defined</li>
						<li>the schema(s) need to be defined</li>
						<li>the possible interactions between peers need to be defined</li>
					</ul>
					<li>Web Services are a well-defined environment for XML exchanges</li>
					<li>Two very different approaches to XML-based distributed programming</li>
					<ol>
						<li>instead of programming-language specific mechanisms, have your components talk to each other in XML</li>
						<li>instead of simply wrapping APIs in XML, redesign your IT landscape into loosely coupled systems</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Web Service Technologies</title>
				<ul>
					<li><em>Simple Object Access Protocol (SOAP)</em></li>
					<ul>
						<li>SOAP messages have an <em>envelope</em> for <q>Web Service Metadata</q></li>
						<li>SOAP messages have a <em>body</em> containing the actual payload</li>
						<li>non-XML data can be attached in the same way as for e-mail messages</li>
					</ul>
					<li><em>Web Service Description Language (WSDL)</em> for describing SOAP-based services</li>
					<ul>
						<li>the payload format must be known</li>
						<li>the different messages that may be sent must be known</li>
						<li>the transport mechanism must be known</li>
						<li>the address where to send the SOAP to must be known</li>
					</ul>
					<li><em>Universal Description, Discovery, and Integration (UDDI)</em> for making WSDL available</li>
					<ul>
						<li>UDDI is intended to be a repository for WSDL descriptions</li>
						<li>UDDI is not a global service like the DNS</li>
						<li>UDDI is modeled after <em>yellow pages</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>SOAP Example Message</title>
				<listing src="soap-example.xml"/>
			</slide>
			<slide>
				<title>WSDL Example (Google)</title>
				<listing src="GoogleSearch.wsdl" line="89-102"/>
			</slide>
			<slide>
				<title>UDDI Data Model</title>
				<img style="height : 75% ; margin : 2% ; " src="uddi-datamodel.gif"/>
			</slide>
		</part>
		<part id="xforms">
			<title>XForms</title>
			<slide>
				<title>HTML Forms Limitations</title>
				<ul>
					<li>HTML forms are very popular for data entry</li>
					<ul>
						<li>many Web-based applications use HTML forms as their interface</li>
						<li>the features offered by HTML forms are very poor</li>
					</ul>
					<li>HTML forms have a lot of limitations</li>
					<ul>
						<li>they cannot check datatypes (fields are always strings)</li>
						<li>they cannot create new fields (if data entry requires repeatable fields)</li>
						<li>they only work in HTML (integral part of the HTML language)</li>
					</ul>
					<li>Workarounds for better Web-based applications are possible</li>
					<ul>
						<li>JavaScript can be used to provide additional functionality</li>
						<li>server-side engines can provide a back-end for better forms</li>
						<li>writing accessible, portable, and usable forms is a challenge</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XForms</title>
				<ul>
					<li>XML is the most ubiquitous data format on the Web</li>
					<ul>
						<li>there is no generally support way to edit or produce XML data</li>
						<li>forms should be XML-based rather than being based on HTTP/MIME</li>
					</ul>
					<li>XForms define an XML-based model for data editing and input</li>
					<ul>
						<li>they are separating content from presentation</li>
						<li>clients are free in their choice of data presentation and acquisition</li>
						<li>XForms provide an XML-in, XML-out model of data handling</li>
						<li>XForms can be implemented server- or client-based</li>
					</ul>
					<li>Client-based XForms require <a href="http://www.mozilla.org/projects/xforms/">browser support</a></li>
					<li>Server-based XForms require XForms↔DHTML mappings</li>
				</ul>
			</slide>
			<slide>
				<title>XForms Limitations</title>
				<ul>
					<li>XForms are good for data-oriented XML</li>
					<ul>
						<li>regularly structured data</li>
						<li>no mixed content</li>
					</ul>
					<li>XForms are inappropriate for document-oriented XML</li>
					<ul>
						<li>irregularly structured data is not well-supported</li>
						<li>mixed content is not supported at all</li>
					</ul>
					<li>XForms is for forms, it is not a general XML editing facility</li>
					<ul>
						<li>XML editors often need a lot of customization</li>
						<li>there is no standards-based way for general-purpose XML editing</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="semweb">
			<title>Semantic Web</title>
			<slide>
				<title>XML is Syntax</title>
				<ul>
					<li>XML facilitates the exchange of trees</li>
					<li>XML schema languages define constraints for trees</li>
					<li>The meaning of the data encoded in the tree is unclear</li>
					<ul>
						<li>XML has no semantics (with the exception of <attr>xml:lang</attr>)</li>
						<li>semantics have to be agreed upon before cooperation is possible</li>
						<li>XML relies on other mechanisms (documentation, formal models)</li>
					</ul>
				</ul>
				<listing src="japanese2.xml"/>
			</slide>
			<slide>
				<title>Semantics</title>
				<ul>
					<li>Semantics can be defined in <em>ontologies</em></li>
					<ul>
						<li>ontologies are a formalization of a conceptualization</li>
						<li>in particular, they are based on a <em>concept for concepts</em></li>
					</ul>
					<li>By referring to ontologies, cooperation can use shared semantics</li>
					<ul>
						<li>of course, this only works if people first agree on the ontology</li>
						<li>domain specialists build ontologies, which are then used for semantics</li>
					</ul>
					<li>Semantic Web technologies revolve around the idea of ontologies</li>
					<ul>
						<li><link href="microformats"/> do not have formalized ontologies (just concepts)</li>
						<li>the <link href="rdf"/> describes resources semantically</li>
						<li>the ontology is defined using <em>RDF Schema (RDFS)</em> or the <em>Web Ontology Language (OWL)</em></li>
						<li>all kinds of AI-style applications are possible using formalized semantics</li>
					</ul>
				</ul>
			</slide>
			<part id="microformats">
				<title>Microformats</title>
				<slide>
					<title>Islands of Semantics</title>
					<ul>
						<li>Microformats solve very specific problems in a very specific way</li>
						<ul>
							<li>encoding address information on a Web page</li>
							<li>encoding a location of something represented by a Web resource</li>
						</ul>
						<li>Microformats can be compared to <q>tagging</q></li>
						<ul>
							<li>a very simple mechanism with a minimal barrier-to-entry</li>
							<li>little flexibility in adapting the mechanism to slightly other uses</li>
							<li>often underspecified and interpretation implementation-dependent</li>
							<li>no unified rules across different platforms which makes processing hard</li>
							<li>nice and easy to start with, but questionable for robust long-term solutions</li>
						</ul>
						<li>Currently there are about 10 reasonably popular microformats</li>
						<ul>
							<li><a href="http://microformats.org/wiki/Main_Page">calendar entries, addresses, licenses, outlines, geolocation, resumes, social networking, …</a></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Microformat Syntax</title>
					<ul>
						<li>HTML has some underspecified and underused elements</li>
						<ul>
							<li><htmel>dfn</htmel>, <htmel>code</htmel>, <htmel>samp</htmel>, <htmel>kbd</htmel>, <htmel>var</htmel>, <htmel>cite</htmel>, <htmel>abbr</htmel>, <htmel>acronym</htmel></li>
							<li>they can be reused and augmented with additional information</li>
						</ul>
						<li>HTML allows non-HTML content in HTML pages</li>
						<ul>
							<li>unknown elements and attributes must be ignored</li>
						</ul>
						<li>HTML allows <html>class</html> attributes to carry semantics</li>
						<ul>
							<li>XHTML 2 attempts to move this functionality to a <a href="http://www.w3.org/TR/xhtml-role/">role attribute</a></li>
						</ul>
						<li>HTML has a <htmlel>head</htmlel> which contains page metadata</li>
						<ul>
							<li>for example, the <htmel>link</htmel> element specifies connections to other resources</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="rdf">
				<title short="RDF">Resource Description Framework (RDF)</title>
				<slide>
					<title>Describing Resources</title>
					<ul>
						<li>RDF describes everything in <em>triples</em></li>
						<ul>
							<li>making a statement about a <em>resource</em> (identified by a <link href="uri">URI</link></li>
							<li>describing a certain <em>property</em> of the resource</li>
							<li>specifying a <em>value</em> for that property</li>
						</ul>
					</ul>
					<pre href="http://www.w3.org/TR/REC-rdf-syntax/#intro"><![CDATA[<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:contact="http://www.w3.org/2000/10/swap/pim/contact#">
  <contact:Person rdf:about="http://www.w3.org/People/EM/contact#me">
    <contact:fullName>Eric Miller</contact:fullName>
    <contact:mailbox rdf:resource="mailto:em@w3.org"/>
    <contact:personalTitle>Dr.</contact:personalTitle> 
  </contact:Person>
</rdf:RDF>]]></pre>
				</slide>
				<slide>
					<title>RDF Graphs</title>
					<img src="rdf-graph.png" style="height : 75% ; margin : 2% ; " href="http://www.w3.org/TR/REC-rdf-syntax/#intro"/>
				</slide>
				<slide>
					<title>RDF is Simple and Complex</title>
					<ul>
						<li><a href="http://www.w3.org/TR/rdf-concepts/">RDF's abstract model</a> is the idea of descriptive triples</li>
						<ul>
							<li>the actual RDF model is rooted in <em>description logic</em></li>
							<li>RDF itself can only describe individuals (something identified by URI)</li>
						</ul>
						<li><a href="http://www.w3.org/TR/rdf-syntax-grammar/">RDF/XML</a> is an XML syntax for encoding triples</li>
						<ul>
							<li>the syntax allows a variety of ways to represent the same RDF statements</li>
							<li>processing RDF/XML with XML tools is likely to fail</li>
							<li>use RDF parsers to parse all variations of RDF/XML into an abstract RDF graph</li>
						</ul>
						<li><a href="http://www.w3.org/TR/rdf-schema/">RDF Schema</a> supports the creation of <em>RDF vocabularies</em></li>
						<ul>
							<li>describe the <em>classes of things</em> that can be used in statements</li>
							<li>describe the <em>properties</em> which can be used for each of these classes</li>
							<li>describe the <em>allowed</em> values for the supported properties</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>RDF Schema Graph</title>
					<img src="rdfs-graph.png" style="height : 75% ; margin : 2% ; " href="http://www.w3.org/TR/REC-rdf-syntax/#schemaclasses"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML is Growing</title>
				<ul>
					<li>XML is the foundation for structured information</li>
					<li>XML is getting closer to programming languages</li>
					<li>XML is becoming the standard toolset for any kind of structured information</li>
					<li>XML itself is simple, but using XML wisely is not always simple</li>
					<li>Schemas and documents may live very long, so plan ahead and choose wisely</li>
				</ul>
			</slide>
		</part>
	</presentation>
</xslidy>
