<?xml version="1.0" encoding="UTF-8"?>
<!-- $Id: xml-fall06.xml 295 2006-12-10 00:12:59Z dret $ -->
<?xslidy counter-separator=":&#160;" ?>
<?xslidy counter-format="full" ?>
<?xslidy extension-file="html" ?>
<?xslidy extension-link="" ?>
<?xslidy img-path="img" ?>
<?xslidy listing-class="listing" ?>
<?xslidy listing-path="src" ?>
<?xslidy outline-class="outline" ?>
<?xslidy outline-title="Outline" ?>
<?xslidy outlink-mark="a" ?>
<?xslidy outlink-style="class(outlink)" ?>
<?xslidy part-slide-count="all" ?>
<?xslidy part-slide-text=" [*]" ?>
<?xslidy slidy-prefix="slidy" ?>
<?xslidy style-uri="slidy-ischool/i-school.css" ?>
<?xslidy xslidy-prefix="xslidy" ?>
<xslidy xmlns="http://dret.net/xmlns/xslidy/1" xmlns:xslidy="http://dret.net/xmlns/xslidy/1">
	<title short="XML Foundations"><a href="./">XML Foundations (INFOSYS 242)</a></title>
	<author short="E. Wilde"><a href="http://dret.net/netdret/" title="dret.net">Erik Wilde</a></author>
	<affiliation short="UC Berkeley iSchool"><a href="http://www.berkeley.edu/" title="University of California, Berkeley">UC Berkeley</a> <a href="http://ischool.berkeley.edu/" title="School of Information">iSchool</a></affiliation>
	<date short="Fall 2006">Fall Semester 2006</date>
	<copyright>2006 Erik Wilde</copyright>
	<layout>
		<slide class="cover" cover="slidycover">
			<h1><title/></h1>
			<h3><title level="xslidy"/></h3>
			<h5><author/>, <affiliation/><br/><date/></h5>
			<a rel="license" title="view full text of license" href="http://creativecommons.org/licenses/by-nc-sa/2.5/" class="bottom-align" style="margin-bottom : 2%">
				<table>
					<tr>
						<td align="left">
							<img alt="Creative Commons License" border="0" src="somerights20.png" height="31" width="88"/>
						</td>
						<td style="font-size : small ; line-height : 120%;" valign="middle" align="left">
							<p>This work is licensed under a Creative Commons<br/>Attribution-NonCommercial-ShareAlike 2.5 License.</p>
						</td>
					</tr>
				</table>
			</a>
		</slide>
		<class>
        </class>
	</layout>
	<style type="text/css" src="xslidy-fall06.css"/>
	<index name="index.html">
		<category element="xml" class="xml"/>
		<category element="elem" class="xml elem"/>
		<category element="cssp" class="css"/>
		<category element="csss" class="css"/>
		<category element="css" class="css"/>
		<category element="xpathf" class="xpath"/>
		<category element="xpath" class="xpath"/>
		<category element="xslte" class="xslt elem"/>
		<category element="xslta" class="xslt"/>
		<category element="xslt" class="xslt"/>
		<category element="xsde" class="xsd elem"/>
		<category element="xsda" class="xsd"/>
		<category element="xsd" class="xsd"/>
	</index>
	<toc id="html-toc" name="toc.html">
		<table rules="all" cellspacing="0" cellpadding="5" width="100%">
			<thead>
				<tr>
					<th>Date</th>
					<th>Subject</th>
					<th>Slides</th>
					<th>Required Reading</th>
					<th>Resources</th>
				</tr>
			</thead>
			<tbody>
				<for-each-presentation>
					<tr>
						<td align="right" valign="top"><date/></td>
						<td><b><title/><span class="toggle">:</span></b> <span class="toggle"><span class="abstract"><toc id="abstract"/></span></span></td>
						<td align="center"><presentation-link title="Lecture Slides"><title form="short"/></presentation-link> <slides>(*&#160;Slides)</slides></td>
						<td><toc id="reading"/></td>
						<td><toc id="resources"/></td>
					</tr>
				</for-each-presentation>
			</tbody>
		</table>
	</toc>
	<toc id="sylvia" name="242.xml">
		<course xmlns="urn:publicid:IDN+www.sims.berkeley.edu:schema:syllabusapp:syllabus:200404:en" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:publicid:IDN+www.sims.berkeley.edu:schema:syllabusapp:syllabus:200404:en syllabus_schema.xsd">
			<generalInformation>
				<title>XML Foundations</title>
				<units>2</units>
				<website>http://dret.net/lectures/xml-fall06/</website>
				<departmentListing>
					<name>SIMS</name>
					<code>INFOSYS</code>
					<courseNumber>242</courseNumber>
				</departmentListing>
				<schedule>
					<year>2006</year>
					<semester>F</semester>
					<startDate>2006-08-29</startDate>
					<endDate>2006-10-19</endDate>
				</schedule>
				<teachingTeam>
					<teacher>
						<typeCode>Professor</typeCode>
						<initials>EW</initials>
						<name>
							<givenName>Erik</givenName>
							<familyName>Wilde</familyName>
						</name>
						<contact>
							<email>dret@sims.berkeley.edu</email>
							<phone>
								<type>Office</type>
								<number>+1-510-6432253</number>
							</phone>
							<website>http://dret.net/netdret/</website>
						</contact>
						<officeHours>
							<dayPattern>
								<dayTime>
									<dayOfWeek>Tu</dayOfWeek>
									<timeSpan>
										<startTime>15:30:00</startTime>
										<endTime>16:30:00</endTime>
									</timeSpan>
								</dayTime>
							</dayPattern>
							<dayPattern>
								<dayTime>
									<dayOfWeek>Th</dayOfWeek>
									<timeSpan>
										<startTime>15:30:00</startTime>
										<endTime>16:30:00</endTime>
									</timeSpan>
								</dayTime>
							</dayPattern>
							<location>314 South Hall</location>
						</officeHours>
					</teacher>
					<teacher>
						<typeCode>TA</typeCode>
						<initials>KL</initials>
						<name>
							<givenName>Katrina</givenName>
							<familyName>Rhoads Lindholm</familyName>
						</name>
						<contact>
							<email>krhoads@sims.berkeley.edu</email>
							<website>http://ischool.berkeley.edu/~krhoads/</website>
						</contact>
						<officeHours>
							<dayPattern>
								<dayTime>
									<dayOfWeek>M</dayOfWeek>
									<timeSpan>
										<startTime>12:30:00</startTime>
										<endTime>14:00:00</endTime>
									</timeSpan>
								</dayTime>
							</dayPattern>
							<location>210 South Hall</location>
						</officeHours>
					</teacher>
				</teachingTeam>
				<gradingOptionCode>PNP</gradingOptionCode>
				<textList>
					<textbook>
						<code>Ray</code>
						<title>Learning XML, 2nd Edition</title>
						<author>
							<givenName>Erik</givenName>
							<middleName>T.</middleName>
							<familyName>Ray</familyName>
						</author>
						<publisher>O'Reilly</publisher>
						<publishDate>September 2003</publishDate>
						<isbn>0-596-00420-6</isbn>
						<requirement>Required</requirement>
					</textbook>
				</textList>
				<description>
					<p>Three hours of lecture, one hour of Laboratory per week. The Extensible Markup Language (XML), with its ability to define formal structural and semantic definitions for metadata and information models, is the key enabling technology for information services and document-centric business models that use the Internet and its family of protocols. This course introduces XML syntax, styles and transformations, and schema languages. It balances conceptual topics with practical skills for designing and implementing conceptual models as XML schemas.</p>
				</description>
			</generalInformation>
			<syllabus>
				<instructionFormatCode>LEC</instructionFormatCode>
				<dayPattern>
					<dayTime>
						<dayOfWeek>Tu</dayOfWeek>
						<timeSpan>
							<startTime>14:00:00</startTime>
							<endTime>15:30:00</endTime>
						</timeSpan>
					</dayTime>
					<dayTime>
						<dayOfWeek>Th</dayOfWeek>
						<timeSpan>
							<startTime>14:00:00</startTime>
							<endTime>15:30:00</endTime>
						</timeSpan>
					</dayTime>
				</dayPattern>
				<location>110 South Hall</location>
				<classes>
					<xslidy:for-each-presentation>
						<class>
							<title><xslidy:title/></title>
							<date><xslidy:date form="short"/></date>
							<xslidy:if-toc id="abstract"><description><xslidy:toc id="abstract"/></description></xslidy:if-toc>
							<xslidy:if-toc id="reading"><readingList><reading><requirement>Required</requirement><comment><xslidy:toc id="reading"/></comment></reading></readingList></xslidy:if-toc>
							<resourceList>
								<resource>
									<title>Lecture Notes</title>
									<url><xslidy:presentation-link element="" prefix="http://dret.net/lectures/xml-fall06/"/></url>
								</resource>
								<xslidy:if-toc id="resources"><resource><comment><xslidy:toc id="resources"/></comment></resource></xslidy:if-toc>
							</resourceList>
						</class>
					</xslidy:for-each-presentation>
				</classes>
<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
<assignments xmlns="urn:publicid:IDN+www.sims.berkeley.edu:schema:syllabusapp:syllabus:200404:en"><assignment><title>Assignment 1: Getting Started with XML and XML Editors</title><status>Ungraded</status><assignedDate>2006-09-05</assignedDate><dueDate>2006-09-12</dueDate><description>This assignment introduces you to XML in the context of XML Spy or the oXygen XML Editor. You don't have to turn anything in.  However, you should use this opportunity to get comfortable with one of the two editors, as you will be using them for the rest of the semester, and maybe the rest of your life.</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/1</descriptionURL><resource><title>Sample Files</title><url>http://www.dret.net/lectures/xml-fall06/a/1/a1.zip</url></resource></assignment><assignment><title>Assignment 2: Résumé XML and DTD</title><status>Ungraded</status><assignedDate>2006-09-07</assignedDate><dueDate>2006-09-14</dueDate><description>In this assignment, you will take a sample résumé and create an XML representation of it.  You will also create a DTD that can be used to validate your XML document, and other résumés that are structurally similar to yours.</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/2</descriptionURL><answers><a href="http://www.dret.net/lectures/xml-fall06/a/2/answers/a2_Tor_Landheim.xml">Tor Landheim's XML</a><br/><a href="http://www.dret.net/lectures/xml-fall06/a/2/answers/a2_Tor_Landheim.dtd">Tor Landheim's DTD</a></answers><resource><title>Sample Résumé</title><url>http://www.dret.net/lectures/xml-fall06/a/2/SampleResume.pdf</url></resource></assignment><assignment><title>Assignment 3: CSS</title><status>Ungraded</status><assignedDate>2006-09-14</assignedDate><dueDate>2006-09-19</dueDate><description>Create a <em xmlns="http://www.w3.org/1999/xhtml">Cascading Style Sheet (CSS)</em> for a simple HTML document. The HTML contains simple structural markup and some additional classes which should be used for creating formatting specific to these contents.</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/3</descriptionURL><answers><a href="http://www.dret.net/lectures/xml-fall06/a/3/answers/">Directory of Submitted CSS's</a><br/><a href="http://www.dret.net/lectures/xml-fall06/a/3/answers/slideshow.html">CSS Slideshow</a></answers><resource><title>HTML Document</title><url>http://www.dret.net/lectures/xml-fall06/a/3/SampleHTML.html</url></resource><resource><title>CSS Zen Garden</title><url>http://www.csszengarden.css/</url></resource><resource><title>WDG HTML Reference</title><url>http://www.htmlhelp.org/reference/html40/</url></resource><resource><title>w3schools.com CSS Reference</title><url>http://www.w3schools.com/css/</url></resource></assignment><assignment><title>Assignment 4: XPath and Namespaces</title><status>Ungraded</status><assignedDate>2006-09-21</assignedDate><dueDate>2006-09-26</dueDate><description>Answer a set of questions about XPath and Namespaces.</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/4</descriptionURL><answers><a href="http://www.dret.net/lectures/xml-fall06/a/4/answers/">Solutions</a></answers><resource><title>XML Document to use for XPath Evaluations</title><url>http://www.dret.net/lectures/xml-fall06/a/4/dret.xml</url></resource><resource><title>Pretty (and Complete) HTML Version of the above XML Document</title><url>http://dret.net/biblio/</url></resource><resource><title>w3schools.com XPath Reference</title><url>http://www.w3schools.com/xpath/</url></resource></assignment><assignment><title>Assignment 5: XML to HTML Transformation</title><status>Ungraded</status><assignedDate>2006-09-26</assignedDate><dueDate>2006-10-03</dueDate><description>Create an XML file with your personal resume information and then transform it into HTML using XSLT.</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/5</descriptionURL><resource><title>w3schools.com XPath Reference</title><url>http://www.w3schools.com/xpath/</url></resource><resource><title>w3schools.com XSLT Reference</title><url>http://www.w3schools.com/xsl</url></resource></assignment><assignment><title>Assignment 6: DTD to Schema</title><status>Ungraded</status><assignedDate>2006-10-05</assignedDate><dueDate>2006-10-12</dueDate><description>Convert your résumé DTD to an XML Schema. In addition to the simple way of moving from DTD syntax to XML Schema syntax, we also require you to improve the schema, so that is a better schema than the DTD (because it is more selective in what it validates).</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/6</descriptionURL><resource><title>w3schools.com XML Schema Reference</title><url>http://www.w3schools.com/schema/</url></resource></assignment><assignment><title>Assignment 7: XML to XML Transformation with CSS on generated HTML</title><status>Graded</status><assignedDate>2006-10-17</assignedDate><dueDate>2006-10-26</dueDate><description>In this final assignment, you will have the opportunity to utilize many of the XML skills you have learned throughout the course.  These include XML, XML Schema, XPath, XSLT and CSS.</description><descriptionURL>http://dret.net/lectures/xml-fall06/a/7</descriptionURL><resource><title>Resume Schema</title><url>http://www.dret.net/lectures/xml-fall06/a/7/resume.xsd</url></resource><resource><title>XSL for converting to HTML</title><url>http://www.dret.net/lectures/xml-fall06/a/7/resume.xsl</url></resource><resource><title>w3schools.com XML Schema Reference</title><url>http://www.w3schools.com/schema/</url></resource><resource><title>w3schools.com XPath Reference</title><url>http://www.w3schools.com/xpath/</url></resource><resource><title>w3schools.com XSLT Reference</title><url>http://www.w3schools.com/xsl</url></resource></assignment></assignments>
<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
			</syllabus>
			<updated>
				<updateDate>2006-07-18</updateDate>
				<updateBy>dret</updateBy>
			</updated>
		</course>
	</toc>
	<presentation id="intro" cover="slidycover">
		<title short="Introduction">Overview and Introduction</title>
		<date short="2006-08-29">Tuesday, August 29, 2006</date>
		<toc id="resources"><a href="http://www.w3.org/Press/1998/XML10-REC">XML 1.0 Press Release</a></toc>
		<toc id="abstract">The <em>Extensible Markup Language (XML)</em> has been introduced in 1998 to enable content providers to publish their content on the Web in an application-specific format. HTML was considered as conveying not enough semantics, since its only purpose was (and is) the preparation of content for Web-based publishing. XML was the first step towards machine-readable data formats for the Web, a trend that since its invention has been taken to higher levels with the idea of the <em>Semantic Web</em>. XML appeared when the Web was in the steepest part of its success curve, and since then has taken over as the globally accepted format for the exchange of machine-readable structured data.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<part>
			<title>Varia</title>
			<slide>
				<title>About Me</title>
				<ul>
					<li>Apprenticeship at <a href="http://www.hmi.de/index_en.html">Hahn-Meitner-Institut Berlin (HMI)</a> (85-88)</li>
					<li>Computer Science at <a href="http://www.tu-berlin.de/eng/">Technical University of Berlin (TUB)</a> (88-91)</li>
					<ul>
						<li>working on DAPHNE, an SGML-based document preparation system</li>
					</ul>
					<li>Ph.D. at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> (92-97)</li>
					<ul>
						<li>thesis on <q><a href="http://dret.net/netdret/publications#wil97b">Group and Session Management for Collaborative Applications</a></q></li>
					</ul>
					<li>Post-Doc at <a href="http://www.icsi.berkeley.edu/">ICSI, Berkeley</a> (97/98)</li>
					<ul>
						<li>book on <q><a href="http://dret.net/netdret/publications#wil98">Technical Foundations of the World Wide Web</a></q>
						</li>
					</ul>
					<li>Various activities back in Switzerland (98-06)</li>
					<ul>
						<li>teaching at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> and <a href="http://www.fhnw.ch/">FHNW</a></li>
						<li>working as independent consultant (training, courses, consulting)</li>
						<li>research in <a href="http://dret.net/projects/">various XML-related areas</a></li>
						<li>starting and leading the <a href="http://dret.net/projects/sharef/">ShaRef project</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>About You</title>
				<img style="margin : 4% ; width : 90% ; " src="classroom.jpg"/>
			</slide>
			<slide>
				<title>About this Course</title>
				<ul>
					<li>Course Web page: <code><a href="./">http://dret.net/lectures/xml-fall06/</a></code></li>
					<li>Course mailing list: subscribe at <code><a href="mailto:majordomo@sims.berkeley.edu">majordomo@sims.berkeley.edu</a></code></li>
					<ul>
						<li>no subject (leave blank)</li>
						<li>body of message: <code>subscribe i242</code></li>
					</ul>
					<li>Grading is offered pass/fail only</li>
					<li>Lab times have to be negotiated today or thursday</li>
					<ul>
						<li>Tuesday 11:00-12:30</li>
						<li>Wednesday 12:30-14:00</li>
						<li>Wednesday 16:00-17:30</li>
						<li>Thursday 11:00-12:30</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>About these Slides</title>
					<ul>
						<li>Generated from <a href="http://dret.net/projects/xslidy/">XSLidy</a> <a href="./xml-fall06.xml">XML</a></li>
						<ul>
							<li>all <a href="http://www.w3.org/Talks/Tools/Slidy/">Slidy</a> presentations are generated from this source</li>
							<li><code><a href="./242.xml">242.xml</a></code> for importing the syllabus into <a href="http://rosetta.sims.berkeley.edu:8085/sylvia/f06/view/242.complete">SylViA</a></li>
							<li><code><a href="./toc.html">toc.html</a></code> for displaying the summary on the <a href="./">course's Web page</a></li>
						</ul>
						<li>Designed for online presentation and use (lots of links!)</li>
						<ul>
							<li>for printing, use <q>a</q> (all slides), and <q>s</q> (smaller font) a couple of times</li>
						</ul>
						<li>A good real-world example for XML applications</li>
						<ul>
							<li>XSLidy is useful, but there is no interface (XML editing only)</li>
							<li>SylViA is useful, but there is no interface (XML editing or XSLidy export)</li>
							<li>SylViA is over-modeled in some areas and too monolithic</li>
							<li>UCB-wide management of course material and syllabi would be great</li>
						</ul>
					</ul>
			</slide>
			<slide>
				<title>Additional Resources</title>
				<ul>
					<li>My <a href="http://dret.net/glossary/">Online Glossary at <code>http://dret.net/glossary/</code></a></li>
						<ul>
							<li>suggestions, updates, corrections are very welcome</li>
							<li>another exercise in how to use XML and XSLT for information management</li>
						</ul>
					<li>My <a href="http://dret.net/biblio/">bibliography at <code>http://dret.net/biblio/</code></a></li>
						<ul>
							<li>suggestions, updates, corrections are very welcome</li>
							<li>produced by an <link href="sharef">XML-centric system for managing bibliography data</link></li>
						</ul>
					<li>The <a href="http://www.w3.org/"><em>World Wide Web Consortium (W3C)</em></a></li>
					<ul>
						<li>the organization which invented XML</li>
						<li>as well as (almost) all other technologies covered in this course</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Why XML?</title>
			<slide>
				<title>Web Technologies</title>
				<ul>
					<li>Early Web: URI+HTTP+HTML</li>
					<ul>
						<li>URIs identify resources (in a human-readable way)</li>
						<li>HTTP retrieves resources (using a simple protocol)</li>
						<li>HTML is the resource format (using a simple data format)</li>
					</ul>
					<li>The early Web was a distributed hypermedia system</li>
					<ul>
						<li>not designed by hypermedia researchers or companies</li>
						<li>simple enough to be adopted very fast</li>
					</ul>
					<li>The Web today uses many different technologies</li>
					<ul>
						<li>URI+HTTP+HTML for basic Web publishing</li>
						<li>CSS &amp; JavaScript (maybe even AJAX) for advanced publishing</li>
					</ul>
					<li>JavaScript &amp; XML (a.k.a. AJAX)</li>
					<ul>
						<li>scripts dynamically loading data from a server</li>
						<li>machine-to-machine interaction: the server and the script</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>From Humans to Machines</title>
				<ul>
					<li>The Web was designed for humans</li>
					<ul>
						<li>HTML is a language for describing page layout and links</li>
						<li>machines were only used for implementing it</li>
					</ul>
					<li>Search engines were the first machine users on the Web</li>
					<ul>
						<li>they made the Web's success possible</li>
						<li>they demonstrated how hard it is to <q>understand</q> HTML pages</li>
						<li>search engines are still a very active field of research</li>
					</ul>
					<li>A bigger Web needs more automation</li>
				</ul>
			</slide>
			<part>
				<title>Pre-XML Problems</title>
				<slide>
					<title>HTML is for Humans</title>
					<ul>
						<li>HTML is a format for <q>dead ends</q></li>
						<ul>
							<li>HTML is good for rendering Web pages</li>
							<li>HTML is bad for understanding Web pages</li>
							<li>the browser is a <q>dead end</q> (from a machine's point of view)</li>
						</ul>
						<li>Web growth in the late 90's was enormous</li>
						<ul>
							<li>everybody was putting information <q>online</q></li>
							<li>but this information was inaccessible for machines</li>
						</ul>
						<li>How can this information be made accessible to machines?</li>
						<ul>
							<li>HTML is not the right format (slightly better than fax machines)</li>
							<li>there was no other widely accepted format for structured data</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>A Machine-Friendly Web</title>
					<ul>
						<li>Information should be published in a machine-understandable format</li>
						<ul>
							<li>HTML is good for rendering Web pages</li>
							<li>HTML is bad for understanding Web pages</li>
							<li><q>understanding</q> is the key term here: <u>application</u> semantics!</li>
						</ul>
						<li>Information should be published in application-specific formats</li>
						<ul>
							<li>HTML is one application: Rendering documents for humans</li>
							<li>machines need other structures to process Web content</li>
						</ul>
						<li>1996: W3C Working Group <q>SGML on the Web</q></li>
						<ul>
							<li>HTML is just one document type defined with SGML</li>
							<li>SGML is a very complex and expensive technology</li>
							<li>how can SGML be made easily and widely usable?</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XML on the Web</title>
				<slide>
					<title>SGML, HTML, and XML</title>
					<ul>
						<li>Standard Generalized Markup Language (SGML)</li>
						<ul>
							<li>a language for designing <em>document types</em></li>
							<li>a very complex standard with many expensive and non-interoperable implementations</li>
						</ul>
						<li>Hypertext Markup Language (HTML)</li>
						<ul>
							<li>implements <a href="http://www.w3.org/TR/REC-html40/sgml/loosedtd.html">a simple SGML <em>document type</em></a></li>
							<li>its syntax is <a href="http://www.oasis-open.org/cover/sgmlsyn/sgmlsyn.htm">SGML syntax</a>, it is not defined by HTML itself</li>
							<li>uses very few SGML features, dedicated processors are rather easy to build</li>
						</ul>
						<li>Extensible Markup Language (XML)</li>
						<ul>
							<li>a language for designing <em>document types</em> (i.e., classes of documents)</li>
							<li>a greatly simplified version of SGML, omitting many obscure features</li>
							<li>a specification with <u>no optional parts!</u></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Documents on the Web</title>
					<ul>
						<li>XML's idea was that content should be published as XML</li>
						<ul>
							<li>stylesheets could then be used to render human-readable views</li>
							<li>machines could simply use the underlying XML</li>
						</ul>
						<li>There are (almost) no XML documents on the Web</li>
						<ul>
							<li>stylesheet support depends on browsers (software has a long life!)</li>
							<li>many content providers do not want to publish machine-readable data</li>
						</ul>
						<li>There are many XML documents behind HTML documents</li>
						<ul>
							<li>content does not have to be made public in a machine-readable way</li>
							<li>browser-independent HTML can be produced from XML</li>
							<li>XML technologies can be leveraged on the server-side</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Documents Elsewhere</title>
					<ul>
						<li>XML is not used as intended, but it is very successful</li>
						<ul>
							<li>as a server-side foundation for Web publishing</li>
							<li>as a B2B-focused format with no Web publishing in mind</li>
						</ul>
						<li>XML has been successful because of different reasons</li>
						<ul>
							<li>being there at the right time (Internet bubble)</li>
							<li>politically correct (the W3C is OS-agnostic)</li>
							<li>technically sound (simple and no optional parts)</li>
							<li>human-readable based on a well-known syntax</li>
							<li>great for rapid prototyping and experiments</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XML Today</title>
				<slide>
					<title>Used Everywhere</title>
					<ul>
						<li>Very small: Messages from sensors</li>
						<ul>
							<li>e.g., building automation or car electronics</li>
							<li>mostly implemented in hardware or firmware</li>
						</ul>
						<li>Very large: Genome sequences</li>
						<ul>
							<li>encoding the results of genome analyses</li>
							<li>yields very large XML documents (several gigabytes)</li>
						</ul>
						<li>Very different processing requirements</li>
						<ul>
							<li>very fast processing (time critical applications)</li>
							<li>memory-conserving processing (very large documents)</li>
							<li>incremental processing (streaming)</li>
							<li>random access (only small part required)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>This Course and XML</title>
					<ul>
						<li><q>XML is the ASCII for the 21<sup>st</sup> century</q></li>
						<ul>
							<li>information professionals should know and use XML</li>
							<li>you will see it in many projects</li>
							<li>you will hopefully use it in many projects</li>
							<li>you will be able to build and test prototypes very rapidly</li>
						</ul>
						<li>What do you need for using XML?</li>
						<ul>
							<li>XML and some kind of schema language</li>
							<li>XSLT for processing it</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>What is XML?</title>
			<slide>
				<title>XML Ying &amp; Yang</title>
				<ul>
					<li>XML is:</li>
					<ul>
						<li>great for exchanging trees (if this is what you want to do)</li>
						<li>platform-independent (even your mobile phone processes XML)</li>
						<li>a foundation for other technologies (some of which we will look at)</li>
					</ul>
				</ul>
				<ul>
					<li>XML is not:</li>
					<ul>
						<li>a programming language (ever programmed comma-separated values?)</li>
						<li>capturing semantics (without higher-layer consensus, XML is worthless)</li>
						<li>ensuring interoperability (we both use bits! we can interoperate!)</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>What is XML Good for?</title>
				<slide>
					<title>Why Use XML?</title>
					<ul>
						<li>Because you want to share data</li>
						<ul>
							<li>share it in a format which is widely used and easy to use</li>
							<li>enable others to use it on various platforms with existing tools</li>
						</ul>
						<li>Because you want to share data cheaply</li>
						<ul>
							<li>it is easier to use XML than to invent something new</li>
							<li>it is even easier to use an existing XML schema than to invent a new one</li>
						</ul>
						<li>Because you want to share data openly</li>
						<ul>
							<li>if you invent new formats, people must process them</li>
							<li>avoid applying the <q>security through obscurity</q> principle inadvertently</li>
							<li>application-specific processing should be deferred to higher layers</li>
						</ul>
					</ul>
				</slide>
				<slide id="sharef">
					<title>Case Study</title>
					<ul>
						<li>Managing bibliographic data in universities is a problem</li>
						<ul>
							<li>very different cultures (law vs. computer science)</li>
							<li>different tool sets (programs, operating systems, habits)</li>
							<li>many potential uses (yearly reports, CV, departmental web site)</li>
						</ul>
						<li><a href="http://dret.net/projects/sharef/">ShaRef (Shared References)</a> is trying to solve this problem</li>
						<ul>
							<li>XML-based and open data model</li>
							<li>lossless import and export for important data formats</li>
							<li>easy integration into existing IT landscapes</li>
						</ul>
						<li>XML helps in processing and exchanging data</li>
						<ul>
							<li>XML data can be processed with many tools and technologies</li>
							<li>XML can be used on many different platforms</li>
							<li>for XML to be used, a well-defined data model is necessary</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Pre-XML Data</title>
					<ul>
						<li>Handcrafted syntax is hard to understand and process</li>
						<li>Parsing requires a specialized parser</li>
						<li>Character set issues can become very complicated</li>
					</ul>
					<listing src="242.bib" line="1-10"/>
				</slide>
				<slide>
					<title>XMLized Data (Bad Idea)</title>
					<listing src="242.badbib.xml" line="1-12"/>
				</slide>
				<slide>
					<title>XMLized Data</title>
					<ul>
						<li>Structurally identical to the original BibTeX document</li>
						<li>Can be processed with XML tools and technologies</li>
						<li>Character set issues still unsolved (needs <a href="http://dret.net/bibconvert/tex2unicode">mapping table</a>)</li>
					</ul>
					<listing src="242.bib.xml" line="3-34"/>
				</slide>
				<slide>
					<title>XML Data</title>
					<ul>
						<li>Well-defined and well-documented data model</li>
						<li>Reusable in different contexts</li>
						<li>Still some open issues (e.g., no concept of author identity)</li>
					</ul>
					<listing src="242.xml" line="3-35"/>
				</slide>
				<slide>
					<title>Other XML Data</title>
					<ul>
						<li>Data using other schemas can be easily derived</li>
						<li>Both formats must be <q>understood</q> to make the mapping</li>
						<li>Some things may be missing (e.g., <em>translated titles</em>)</li>
					</ul>
					<listing src="242.endnote.xml" line="4-43"/>
				</slide>
				<slide>
					<title>Is XML Self-Describing?</title>
					<ul>
						<li>XML is often said to be <q>self-describing</q></li>
						<ul>
							<li>many people think this is the same as <q>self-explanatory</q></li>
							<li>the catch is what exactly it is you refer to by <q>describing</q></li>
						</ul>
						<li>Database data cannot live without a database</li>
						<ul>
							<li>database data is simply content, the structure is provided by a DBMS</li>
							<li>XML documents have their structure encoded within them</li>
							<li>compared to database data, XML in fact is <q>self-describing</q></li>
						</ul>
						<li>What is the gap between <q>self-describing</q> and <q>self-explanatory</q>?</li>
						<ul>
							<li>it is impossible to find out how the document could be modified</li>
							<li>there are no semantics associated with neither structure nor content</li>
							<li>so <q>self-describing</q> means, you can guess a lot, but you maybe wrong</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>What is XML not Good for?</title>
				<slide>
					<title>XML is Character-Based</title>
					<ul>
						<li>XML is <u>not</u> a binary format, it is <link href="unicode">based on Unicode</link></li>
						<ul>
							<li>binary formats cannot (or rather should not) be described using XML</li>
						</ul>
						<li>Multimedia formats often are binary</li>
						<ul>
							<li>image formats such as GIF, JPEG, and PNG</li>
							<li>audio formats such as MP3 and AAC</li>
							<li>video formats such as MPEG4 and H.264</li>
						</ul>
						<li>But: Multimedia also uses many XML formats</li>
						<ul>
							<li>vector graphics formats such as <em>Scalable Vector Graphics (SVG)</em></li>
							<li><em>Synchronized Multimedia Integration Language (SMIL)</em> for describing presentations</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML is a Syntax for Trees</title>
					<ul>
						<li>Not all data is easily represented by trees</li>
						<ul>
							<li>overlapping markup (multiple <q>views</q> of the same content)</li>
							<li>graph-like structures which are less constrained than trees</li>
						</ul>
						<li>What is it that you have in your tree?</li>
						<ul>
							<li>XML encodes a structure purely on the syntactic level</li>
							<li>what the structures <u>mean</u> is in no way described by XML</li>
							<li>XML structures must be accompanied by semantic descriptions</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Usages</title>
					<ul>
						<li>XML can be used <link href="bestpractices">in different ways</link></li>
						<ul>
							<li>people should be able to use your XML directly using standard tools</li>
							<li>if they <em>absolutely need</em> a set of special tools, something is wrong</li>
						</ul>
						<li>XML is hip, so everybody wants to use it</li>
						<ul>
							<li>many things have been created ad-hoc and without much planning</li>
							<li>if you start something which is XML-based, use XML responsibly</li>
							<li>if you have to use some <q>bad XML</q>, complain about it</li>
						</ul>
						<li>Finding the balance can be hard</li>
						<ul>
							<li>XML is great for prototyping and experiments</li>
							<li>once you decide to redesign your XML, it may be too late</li>
							<li><em>XML documents</em> may be short-lived, <em>XML schemas</em> are definitely not</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Beyond XML</title>
			<slide>
				 <title>Sharing Concepts</title>
				 <ul>
					<li>XML is a syntax for trees</li>
					 <ul>
						<li>trees are just data</li>
						<li>for doing something useful, you must <em>understand the trees content's</em></li>
					 </ul>
					<li>Schema-based sharing of concepts is possible</li>
					 <ul>
						<li>HTML works great because everybody is using it</li>
						<li>Anything beyond HTML's capabilities need a new schema</li>
					 </ul>
					<li>General sharing of concepts is hard</li>
					 <ul>
						<li>the AI community tried for decades and failed</li>
						<li>micro-formats are a more humble approach to <q>reusable shared concepts</q></li>
					 </ul>
				 </ul>
			</slide>
			<slide id="intro-semweb">
				<title>The Semantic Web</title>
				<ul>
					<li>Technologies for describing concepts</li>
					<ul>
						<li>the foundation of successful interaction is <em>mutual understanding</em></li>
						<li>describe your XML using Semantic Web technologies</li>
					</ul>
					<li>XML core technologies do not convey any meaning</li>
					<ul>
						<li>XML is a language for exchanging trees</li>
						<li>XML schema languages describe what trees may be exchanged</li>
						<li>XML schema languages are for <em>markup design</em></li>
					</ul>
					<li>Semantic Web technologies have received a lot of attention</li>
					<ul>
						<li>and a lot of research funding</li>
						<li>success for the most general approaches is highly questionable</li>
						<li>proven failure as demonstrated by <a href="http://technetcast.ddj.com/tnc_play_stream.html?stream_id=526">AI's failure</a></li>
						<li>modest approaches are much more promising and likely to succeed</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="intro-conclusions">
			<title>Conclusions</title>
			<slide>
				<title>What's the Plan?</title>
				<ul>
					<li><link href="basics">XML Basics</link> and <link href="bestpractices">how to apply them</link></li>
					<li><link href="dtd">Describing classes of XML documents</link></li>
					<li><link href="css">How to control the presentation of XML documents</link></li>
					<li><link href="xmlns">Combining different vocabularies of XML documents</link></li>
					<li><link href="xpath">Selecting parts of an XML document</link></li>
					<li><link href="xslt1">Transforming XML into something else (or XML again)</link></li>
					<li><link href="xsd1">A more complicated way to describe classes of XML documents</link></li>
					<li><link href="schemalanguages">Even more ways of describing classes of XML documents</link></li>
					<li><link href="xdbms">How does all of this relate to databases?</link></li>
					<li><link href="trends">What to expect as future developments</link></li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="basics" cover="slidycover">
		<title short="Basics">XML Basics</title>
		<date short="2006-08-31">Thursday, August 31, 2006</date>
		<toc id="reading">Chapters 1.3 (pp. 16-28) &amp; 2.1-2.4 (pp. 49-66)</toc>
		<toc id="resources"><a href="http://www.w3.org/TR/REC-xml/">W3C's XML Specification</a></toc>
		<toc id="abstract">The <em>Extensible Markup Language (XML)</em> defines a simple way for structuring data. The power and popularity of XML can be explained by its versatility, the platform-independence, the standards and technologies leveraging it, and the number of tools and products supporting it. Understanding XML itself is rather simple, it only depends on a very small set of other technologies. Unicode and URIs are the most important foundations of XML. XML itself specifies two different things: on the one hand the format for structured data, which are called <em>XML documents</em>, and on the other hand a constraint language for XML documents, which is called <em>Document Type Definition (DTD)</em>.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>Reminders</title>
			<ul>
				<li>Attendance is mandatory</li>
				<li>Course mailing list: subscribe at <code><a href="mailto:majordomo@sims.berkeley.edu">majordomo@sims.berkeley.edu</a></code></li>
				<ul>
					<li>no subject (leave blank)</li>
					<li>body of message: <code>subscribe i242</code></li>
				</ul>
				<li>Lab time needs to be renegotiated</li>
				<ul>
					<li>Monday 11.00-15.00 (earlier is better)</li>
					<li>Tuesday 9.30-12.30 (later is better)</li>
					<li>Wednesday 9.30-12.30 (later is better)</li>
					<li>Thursday 11.00-12.30</li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Foundations for XML</title>
			<slide>
				<title>Identifications</title>
				<ul>
					<li>Identification of Character Encodings</li>
					<ul>
						<li>text can be encoded using different character sets and encodings</li>
						<li>IANA maintains the <a href="http://www.iana.org/assignments/character-sets">official list of character encodings</a></li>
					</ul>
					<li>Identification of Languages</li>
					<ul>
						<li>textual content should be tagged with language information</li>
						<li>specification based on <a href="http://www.loc.gov/standards/iso639-2/langhome.html">ISO 639 language tags</a></li>
					</ul>
				</ul>
			</slide>
			<part id="unicode">
				<title>Unicode</title>
				<slide>
					<title>XML's Idea of Content and Names</title>
					<p>XML documents can use a wide array of characters. They are defined by <a href="http://www.unicode.org/">Unicode</a>, which currently (Version 5.0) defines more than 100'000 characters (#100'000 added in 2005).</p>
					<listing src="japanese1.xml"/>
					<listing src="japanese2.xml"/>
				</slide>
				<slide>
					<title>XML and Unicode</title>
					<ul>
						<li>XML is based on Unicode</li>
						<ul>
							<li>XML it defined in terms of <a href="http://www.w3.org/TR/xml/#sec-starttags">character structures</a></li>
							<li>how these characters are encoded is not part of XML</li>
						</ul>
						<li>How are XML documents encoded?</li>
						<ul>
							<li>applications can use any character encoding they like</li>
							<li>XML processors <u>must</u> support UTF-8 and UTF-16</li>
						</ul>
						<li>How is the encoding <q>encoded</q>?</li>
						<ul>
							<li>part of the XML document: <code>&lt;?xml version="1.0" encoding="UTF-8"?></code></li>
							<li>bootstrap problem solved heuristically or by out-of-band information</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="uri">
				<title>Uniform Resource Identifier (URI)</title>
				<slide>
					<title>Identifiers are Essential</title>
					<ul>
						<li><em>Uniform Resource Locator (URL)</em> is the old concept</li>
						<ul>
							<li>introduced to distinguish between <em>locating</em> and <em>naming</em></li>
							<li><em>locating</em> and <em>naming</em> are two ways of <em>identification</em></li>
							<li>URLs have been replaced by URIs, technically URLs do not exist anymore</li>
						</ul>
						<li>URIs identify resources</li>
						<ul>
							<li>some resources may be retrieved using a protocol: <code>http://dret.net/netdret/</code></li>
							<li>not all resource access is retrieval: <code>mailto:dret@ischool.berkeley.edu</code></li>
							<li>sometimes computers are not required: <code>tel:+1-510-6432253</code></li>
							<li>or resources cannot be located: <code>urn:ietf:rfc:2648</code></li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>XML</title>
			<slide>
				<title>XML Use Cases</title>
				<ul>
					<li>XML is a metalanguage supporting application-specific vocabularies</li>
					<li><em>RSS</em> (and <em>Atom</em>) are XML vocabularies for newsfeeds</li>
					<ul>
						<li><a href="http://docordie.blogspot.com/">Doc or Die</a>: <a href="http://docordie.blogspot.com/rss.xml">RSS feed</a> vs. <a href="http://docordie.blogspot.com/atom.xml">Atom feed</a></li>
						<li>browsers now incorporate newsfeed readers</li>
					</ul>
					<li><em>OpenDocument (ODF)</em> is a language for office application documents</li>
					<ul>
						<li>designed for open and interoperable exchange</li>
						<li>standardized by ISO (which now also standardizes Microsoft's <em>Open XML</em>)</li>
					</ul>
					<li><em>Scalable Vector Graphics (SVG)</em> for portable vector graphics</li>
					<ul>
						<li>designed for embedding in Web pages</li>
						<li>good example for compound documents: <a href="http://www.carto.net/papers/svg/animated_weather_symbols/">HTML containing SVG</a></li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>XML Documents</title>
				<slide>
					<title>Markup?</title>
					<ul>
						<li>Structures are encoded using special characters</li>
						<ul>
							<li>a fundamental difference when comparing to binary formats</li>
							<li>markup languages can be read and modified using text-based tools</li>
							<li>programs must treat markup characters in a special way</li>
						</ul>
						<li>Documents are content interspersed with markup (i.e., structures)</li>
						<ul>
							<li>XML-aware software interprets the markup</li>
							<li>XML-unaware software just sees a text file</li>
						</ul>
						<li>You have to pay the <link href="markup-price"/></li>
					</ul>
				</slide>
				<slide>
					<title>Basic Concepts</title>
					<ul>
						<li>XML Documents have an <em>XML declaration</em> (optional)</li>
						<li>There is exactly one <em>document element</em> (a.k.a. <em>root element</em>)</li>
						<li>Elements may be nested (there is no conceptual limit)</li>
						<ul>
							<li>elements may be repeated (they can be identified by position)</li>
						</ul>
						<li>Elements are marked up using <em>tags</em></li>
						<ul>
							<li>most elements have content, surrounded by <em>start</em> and <em>end tags</em></li>
							<li>empty elements are allowed and may use a special notation</li>
						</ul>
						<li>Elements may have attributes (zero to any number)</li>
						<ul>
							<li>attributes can only occur once on an element (i.e., they cannot be repeated)</li>
						</ul>
					</ul>
					<listing src="my-first.xml"/>
				</slide>
				<slide id="xmltree">
					<title>Tree Syntax</title>
					<ul>
						<li>Markup is important, but only a notation</li>
						<li>XML documents are trees with different node types</li>
						<ul>
							<li>nodes so far: document, element, attribute, text</li>
						</ul>
						<img style="width : 90% ; margin : 4% ;" src="document-tree.png" title="XML document tree"/>
					</ul>
				</slide>
				<slide id="xmlelements">
					<title>Elements</title>
					<ul>
						<li>Elements can use a <a href="http://www.w3.org/TR/xml/#NT-Name">wide variety of names</a></li>
						<ul>
							<li>Allowed: <elem>html</elem>, <elem>_</elem>, <elem>:</elem>, <elem>id9832798472</elem>, <elem>こんにちは</elem></li>
							<li>Disallowed: leading numbers, spaces, control characters</li>
						</ul>
						<li>Element names usually convey some information about the content</li>
						<ul>
							<li>this is not reliable and highly language-dependent</li>
							<li>it is <em>very useful</em> when working with a known vocabulary</li>
							<li>it is <em>potentially harmful</em> when working with an unknown vocabulary</li>
						</ul>
						<li>Elements are the foundation for XML's versatility</li>
						<ul>
							<li>they can be nested (<code>&lt;address>&lt;city>Berkeley&lt;/city>&lt;zip>94709&lt;/zip>...</code>)</li>
							<li>they can be repeated (<code>&lt;givenname>Erik&lt;/givenname>&lt;givenname>Thomas&lt;/givenname></code>)</li>
							<li>their sequence can convey additional information (given names have a sequence)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Attributes</title>
					<ul>
						<li>Additional information pertaining to elements</li>
						<li>Traditionally, anything that is not considered <q>content</q></li>
						<ul>
							<li>SGML is a document markup language</li>
							<li>XML uses SGML's concepts</li>
							<li>XML has its roots in the document world</li>
						</ul>
						<li>Elements: Content (i.e., Data); Attributes: Metadata</li>
						<li>Documents often distinguish by what is textual content</li>
					</ul>
					<listing src="section.xml" line="12-20"/>
				</slide>
				<slide>
					<title>Attribute Syntax</title>
					<ul>
						<li>Naming rules are the same as for <link href="xmlelements"/></li>
						<li>Attributes always appear within an element's <em>start tag</em></li>
						<li>Attributes are <a href="http://www.w3.org/TR/xml/#NT-Attribute">name/value-pairs</a></li>
						<ul>
							<li>the value is enclosed in single or double quotes</li>
						</ul>
						<li>Attribute with a single-quote value: <elem>elem attr="Single: '"/</elem></li>
						<li>Attribute with a double-quote value: <elem>elem attr='Double :"'/</elem></li>
						<li>How can attribute values contain both?</li>
					</ul>
				</slide>
				<slide id="markup-price">
					<title>The Price for Markup</title>
					<ul>
						<li>Markup characters have a special meaning</li>
						<ul>
							<li><q>&lt;</q> opens a tag</li>
							<li>within attribute values, quotes delimit the value</li>
						</ul>
						<li>The literal use of a markup character requires escaping</li>
						<ul>
							<li>XML's <em>entities</em> can refer to pieces of content</li>
							<li>entity syntax is <code>&amp;name;</code> for referring to entity <code>name</code></li>
							<li>XML has 5 <a href="http://www.w3.org/TR/xml/#sec-predefined-ent">predefined entities</a>: <code>&amp;lt;</code>, <code>&amp;gt;</code>, <code>&amp;amp;</code>, <code>&amp;apos;</code>, <code>&amp;quot;</code></li>
						</ul>
						<li>Attribute using both kinds of quotes: <code>&lt;elem attr="Single ' and Double &amp;quot;"/></code></li>
					</ul>
					<pre><![CDATA[<li>Attribute using both kinds of quotes: <code>&lt;elem attr="Single ' and Double &amp;quot;"/></code></li>]]></pre>
				</slide>
				<slide id="mixed-content">
					<title>Mixed Content</title>
					<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smily.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>
					<pre><![CDATA[<p>The term <em>Mixed content</em> in XML refers to elements <a href="http://www.w3.org/TR/xml/#sec-mixed-content">which have text content mixed with elements</a>. What these elements do depends on the elements <img style="height : 1em" src="smily.gif"/>, but the important point is that they are on the same level as the text nodes of the mixed content.</p>]]></pre>
				<img style="width : 90% ; margin : 4% ;" src="mixed-content.png" title="XML tree for mixed content"/>
				</slide>
				<slide>
					<title>Mixed Content Usage</title>
					<ul>
						<li>Database people find mixed content irritating</li>
						<ul>
							<li>cannot be easily mapped to relational structures</li>
							<li>is more <em>document-like</em> than <em>data-like</em></li>
						</ul>
						<li>Document people find mixed content very intriguing</li>
						<ul>
							<li>textual content can still be used as simple text</li>
							<li>markup provides additional information for rich text</li>
						</ul>
					</ul>
				</slide>
				<slide id="whitespace">
					<title>Whitespace</title>
					<ul>
						<li>XML documents often are pretty-printed</li>
						<li><em>Whitespace text nodes</em> often are <q>not really content</q></li>
						<ul>
							<li>XML whitespace characters are <em>space</em>, <em>tab</em>, <em>newline</em>, and <em>carriage return</em></li>
							<li>whitespace text nodes are text nodes containing <em>only</em> whitespace characters</li>
						</ul>
						<img style="width : 90% ; margin : 4% ;" src="document-tree-whitespace.png" title="XML tree with whitespace text nodes"/>
					</ul>
				</slide>
				<slide>
					<title>Significant Whitespace</title>
					<ul>
						<li>Some whitespace text nodes are relevant</li>
						<li>Usually text nodes in <em>mixed content</em> elements</li>
					</ul>
					<p>Whitespace <i>can be</i> <u>very</u> <b>important</b>!</p>
					<pre><![CDATA[<p>Whitespace <i>can be</i> <u>very</u> <b>important</b>!</p>]]></pre>
					<img style="height : 40% ; margin : 2% ;" src="significant-whitespace.png" title="XML tree containing significant whitespace"/>
				</slide>
			</part>
			<part id="wellformed">
				<title>Processing XML</title>
				<slide>
					<title>Observing XML Syntax</title>
					<ul>
						<li>XML's syntax requires you to use the right characters</li>
						<ul>
							<li><a href="http://www.w3.org/TR/xml/#NT-element">the grammar alone</a> allows many XML error</li>
							<li><a href="http://www.w3.org/TR/xml/#GIMatch">additional constraints</a> ensure that everything is used correctly</li>
						</ul>
						<li><em>XML processors</em> (a.k.a. <em>XML parsers)</em> check for these rules</li>
						<ul>
							<li>if there are problems, the document cannot be interpreted as XML</li>
							<li>otherwise, the document is said to be <em>well-formed</em></li>
						</ul>
						<li>Only well-formed documents can be regarded as a tree</li>
						<ul>
							<li>other documents are not XML at all, even though they may be close</li>
							<li>XML processors must report problems to the application (no <em>silent recovery</em>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Validity</title>
					<ul>
						<li><em>Well-formed documents</em> observe XML rules</li>
						<ul>
							<li>they observe XML syntax</li>
							<li>they observe all well-formedness constraints</li>
						</ul>
						<li>Applications require the right elements and attributes</li>
						<li><em>Validity</em> is a more comprehensive concept</li>
						<li><em>Valid documents</em> observe additional rules</li>
						<ul>
							<li>they must be well-formed documents</li>
							<li>they must adhere to the constraints defined in a <link href="dtd"/></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Semantics</title>
					<ul>
						<li>XML is a language for encoding trees</li>
						<ul>
							<li>Elements and attributes are labeled node in this tree</li>
							<li>the labels can be chosen freely by document authors</li>
						</ul>
						<li>The tree's meaning is nothing XML is concerned with</li>
						<ul>
							<li>peers must have a mutual understanding of the semantics</li>
							<li>XML without mutual understanding is almost useless</li>
							<li>reverse engineering often is possible, but it is risky and brittle</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML Documents</title>
				<ul>
					<li>XML documents are structured data using markup</li>
					<li>Elements and Attributes are the main structuring mechanisms</li>
					<li>Elements and Attributes are names, but have no inherent semantics</li>
					<li>For using XML successfully, <em>shared semantics</em> are essential</li>
				</ul>
			</slide>
			<slide>
				<title>XML Document Classes</title>
				<ul>
					<li>XML <link href="dtd"/> define classes of documents</li>
					<li>Elements and Attributes and their usage can be defined</li>
					<li>By validating documents, their structural correctness can be checked</li>
					<li>DTDs solve a small part of checking XML for <em>semantic integrity</em></li>
				</ul>
			</slide>
		</part>		
	</presentation>
	<presentation id="dtd" cover="slidycover">
		<title short="DTD">Document Type Definition (DTD)</title>
		<date short="2006-09-05">Tuesday, September 5, 2006</date>
		<toc id="reading">Chapter 4-4.2 (pp. 108-132)</toc>
		<toc id="resources"><a href="xml-quickref.pdf">XML QuickRef</a></toc>
		<toc id="abstract">The XML specification defines a format for structured data (XML documents) and a grammar-based constraint language for these (DTD). In SGML-based systems, DTDs were often very complex and feature-rich constructs, which controlled a lot of the processing of SGML documents. XML greatly simplified DTDs, and de-facto usage of DTDs today simplified them even more. In many systems today, DTDs are not used at all or generated from sample documents. In this lecture, it is argued that DTDs (or schemas, to be more general) should be taken seriously in any non-trivial XML application, because they are a representation of the underlying (and often underspecified) data model of the application.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<part>
			<title>Schema Languages</title>
			<slide>
				<title>XML Validation</title>
				<ul>
					<li>XML knows two <q>states</q> of documents, <em>well-formed</em> and <em>valid</em></li>
					<li><em>well-formed</em> documents satisfy all basic constraints of the XML specification</li>
					<ul>
						<li>they can be parsed according to the XML grammar</li>
						<li>they satisfy the additional constraints (e.g., start and end tags match)</li>
						<li>together, this means they can be translated into a <link href="xmltree">tree</link></li>
					</ul>
					<li><em>valid</em> documents have been validated against a DTD</li>
					<ul>
						<li>a document must be well-formed before it can be validated</li>
						<li>all elements and attributes have been defined</li>
						<li>elements and attributes are used according to their definition</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Validation and Applications</title>
				<img src="valid-documents.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
			<slide>
				<title>Non-XML, Well-Formed, and Valid</title>
				<listing src="non.xml" line="3-9"/>
				<listing src="invalid.xml" line="3-9"/>
				<listing src="valid.xml" line="3-9"/>
			</slide>
			<slide>
				<title>DTD Example</title>
				<listing src="valid.xml" line="1-2"/>
				<listing src="address.dtd"/>
				<ul>
					<li>The DTD defines constraints on element and attribute usage</li>
					<li>The DTD does only in part constrain textual contents</li>
				</ul>
			</slide>
			<slide>
				<title>XML Schema Languages</title>
				<ul>
					<li>DTDs are part of XML itself</li>
					<ul>
						<li>XML specifies the document format <u>and</u> one schema language</li>
						<li>DTD support is provided by most XML processors (<a href="http://www.w3.org/TR/REC-xml/#proc-types" title="XML specification">validating processors</a>)</li>
					</ul>
					<li>Other schema languages are available</li>
					<ul>
						<li><link href="xsd1">XML Schema</link> as the W3C's recommendation</li>
						<li><link href="schematron"/> as a rule-based alternative</li>
						<li>various <a href="http://dret.net/glossary/xmlschemalanguage" title="XML glossary">other research projects and products</a></li>
					</ul>
					<li>Choosing appropriate schema language(s) is important</li>
					<ul>
						<li>we look at DTDs because they are part of XML itself</li>
						<li>we look at XML Schema because it is widely used</li>
						<li>we look at Schematron because it is simple and powerful</li>
						<li>you may even invent your own schema language (a.k.a. <em>meta-programming</em>)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>DTD Basics</title>
			<slide>
				<title>XML is SGML light</title>
				<ul>
					<li>XML is a subset of SGML</li>
					<ul>
						<li>XML documents have been greatly simplified</li>
						<li>XML DTDs have retained more of SGML's peculiarities</li>
					</ul>
					<li>DTD design should be left to XML experts</li>
					<ul>
						<li>simple DTDs (for prototypes) are easy to define (or generate)</li>
						<li>serious DTDs for complex data models are hard to define</li>
					</ul>
					<li>XML is a useful tool for experiments and prototypes</li>
					<ul>
						<li>basic knowledge of DTDs is required</li>
						<li>serious XML schemas often use <link href="xsd1">XML Schema</link> anyway</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Connecting Documents and DTDs</title>
				<ul>
					<li>A DTD is a schema for a set of documents</li>
					<ul>
						<li>there may be just one document for a DTD, there may be billions (HTML)</li>
						<li>in most cases, DTDs are managed as a separate resource</li>
					</ul>
					<li>The <a href="http://www.w3.org/TR/xml#sec-prolog-dtd"><em>Document Type Declaration</em></a> <q>contains or points to markup declarations that provide a grammar for a class of documents</q></li>
					<ul>
						<li>the part which is contained is called <em>Internal Subset</em></li>
						<li>the part which is pointed to is called <em>External Subset</em></li>
						<li>internal and external subset together are the <em>Document Type Definition (DTD)</em></li>
					</ul>
					<li>External subsets are identified by <em>Public</em> and <em>System Identifiers</em></li>
					<ul>
						<li><em>public identifiers</em> use a special notation</li>
						<li><em>system identifiers</em> are URIs (relative or absolute)</li>
						<li>applications use (i.e., know or retrieve) the DTD for validation</li>
					</ul>
				</ul>
				<listing src="valid.xml" line="1-2"/>
			</slide>
			<part>
				<title>DTD Syntax</title>
				<slide>
					<title>DTDs are not XML Documents</title>
					<ul>
						<li>DTDs use a special syntax</li>
						<ul>
							<li>somewhat ironic when everything else is XMLized</li>
							<li>DTDs cannot be processed with standard XML tools</li>
							<li>more compact than XML syntax</li>
						</ul>
						<li>Definition of elements and attribute lists</li>
						<ul>
							<li>elements are defined by the content they allow</li>
							<li>attribute lists are sets of allowed attributes on elements</li>
						</ul>
					</ul>
					<listing src="address.dtd"/>
				</slide>
				<slide>
					<title>Syntax Rules</title>
					<ul>
						<li>There is no container containing the whole DTD</li>
						<ul>
							<li><code>&lt;!ELEMENT xml EMPTY></code> thus is a complete DTD</li>
						</ul>
						<li>Definitions (officially called <em>declarations</em>) use <code>&lt;!... ></code> syntax</li>
						<ul>
							<li><code>ELEMENT</code> is used to <link href="dtd-element">define an element</link></li>
							<li><code>ATTLIST</code> is used to <link href="dtd-attlist">define an attribute list</link></li>
							<li><code>ENTITY</code> is used to <link href="dtd-entity">define an entity</link></li>
						</ul>
						<li>The document element is not marked explicitly</li>
						<ul>
							<li>but it must be declared in the document type declaration</li>
							<li>this means the document element is defined by the document, not by the DTD</li>
						</ul>
					</ul>
				<listing src="valid.xml" line="1-3"/>
				</slide>
			</part>
			<part id="dtd-element">
				<title>Defining Elements</title>
				<slide id="element-only-declaration">
					<title>Element Only Content</title>
					<ul>
						<li>Element content is defined by a grammar for the children</li>
						<ul>
							<li>sequences are indicated with a comma: <q><code>,</code></q></li>
							<li>choices are indicated with a vertical bar: <q><code>|</code></q></li>
							<li>optional parts are indicated with a question mark: <q><code>?</code></q></li>
							<li>repeatable parts are indicated with a plus: <q><code>+</code></q></li>
							<li>optional and repeatable parts are indicated with a asterisk: <q><code>*</code></q></li>
							<li>parentheses can be used for grouping and nesting</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="1064-1074"/>
				</slide>
				<slide id="mixed-content-declaration">
					<title>Mixed Content</title>
					<ul>
						<li><link href="mixed-content"/> allows text content and elements to be mixed</li>
						<ul>
							<li><link href="whitespace"/> characters are allowed in <link href="element-only-declaration"/> (this must not be declared)</li>
							<li>for non-whitespace characters, character data must be allowed explicitly</li>
						</ul>
						<li>The allowed child elements may be constrained, but not their order or their number of occurrences</li>
						<li>Mixed Content always is defined as <code>&lt;!ELEMENT x (#PCDATA | a | b | ...)* ></code></li>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="568-568"/>
					<ul>
						<li><em>character only</em> content is a special case of mixed content</li>
						<ul>
							<li>the element may only contain characters (no other elements)</li>
							<li>the repetition is not necessary because there is no choice</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="355-355"/>
				</slide>
				<slide>
					<title>Empty Content</title>
					<ul>
						<li>Empty elements can be useful</li>
						<ul>
							<li>they may contain all information in attributes</li>
							<li>their presence may carry semantics without the need for additional information</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="833-848"/>
				</slide>
			</part>
			<part id="dtd-attlist">
				<title>Defining Attribute Lists</title>
				<slide>
					<title>Attributes belong to Elements</title>
					<ul>
						<li>Attributes are specified in an element's <em>Attribute List</em></li>
						<ul>
							<li>an element definition may have any number of attributes associated with it</li>
							<li>attributes may occur at most once on an element</li>
						</ul>
						<li>Attributes definitions have a <em>name</em>, a <em>type</em>, and a <em>default declaration</em></li>
						<ul>
							<li>the attribute appears according to the default declaration</li>
							<li>if the attribute is present, its value must conform to the type</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="794-801"/>
				</slide>
				<slide id="dtd-attr-type">
					<title>Attribute Types</title>
					<ul>
						<li>Attribute values can be constrained (which is not possible for element content)</li>
						<ul>
							<li><code>CDATA</code> means any character string (but no markup)</li>
							<li>enumerated types list allowed values: <code>(data|ref|object)</code> (list of XML names)</li>
							<li><code>ID</code> for identifying elements (part of <code><link href="ididref"/></code>)</li>
							<li><code>IDREF</code> for referencing identified elements (part of <code><link href="ididref"/></code>)</li>
						</ul>
						<li>Application-oriented attribute types are often <q>simulated</q></li>
						<ul>
							<li>using <link href="param-entity"/>, modeling information can be preserved</li>
						</ul>
					</ul>
					<listing src="xhtml1-transitional.dtd" line="894-894"/>
					<listing src="xhtml1-transitional.dtd" line="52-53"/>
					<ul>
						<li>The default declaration specifies the attribute's presence</li>
						<ul>
							<li><code>#REQUIRED</code> means the attribute has to be specified (on every element)</li>
							<li><code>#IMPLIED</code> marks an optional attribute (the parser may imply a value)</li>
							<li><code>"..."</code> specifies a default value (and the attribute is optional)</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Advanced DTDs</title>
			<part id="ididref">
				<title>ID/IDREF</title>
				<slide>
					<title>References in Documents</title>
					<ul>
						<li>Without Validation, there are no IDs</li>
						<ul>
							<li><code>ID</code> is an <link href="dtd-attr-type">attribute type</link> declared in the DTD</li>
							<li><code>xml:id</code> is an attempt to support schema-independent IDs</li>
						</ul>
						<li>IDs are used to assign identities to elements</li>
						<ul>
							<li>the XML processor reports duplicate IDs as errors (<a href="http://www.w3.org/TR/xml/#id">part of validation</a>)</li>
						</ul>
						<li>IDREFs are used to reference existing IDs</li>
						<ul>
							<li>the XML processor reports references to non-existing IDs as errors (<a href="http://www.w3.org/TR/xml/#idref">part of validation</a>)</li>
						</ul>
						<li>IDs must be XML Names (in particular, they may not start with a number)</li>
					</ul>
				</slide>
				<slide>
					<title>ID/IDREF in a Document</title>
					<listing src="section.xml" line="3-18"/>
					<listing src="section.dtd" line="2-12"/>
				</slide>
				<slide>
					<title>References within the Tree</title>
					<img src="section.png" style="width : 90% ; margin : 4% ; "/>
				</slide>
				<slide>
					<title>Formatting Example</title>
					<p>XSLidy can generate links to sections such as the section about <link href="ididref"/>, this link is then translated into the appropriate HTML code, meaning a link with the target being a fragment identifier to the slide number.</p>
					<pre><![CDATA[<p>XSLidy can generate links to sections such as the section about <link href="ididref"/>, this link is then translated into the appropriate HTML code, meaning a link with the target being a fragment identifier to the slide number.</p>]]></pre>
					<p>After running XSLidy, the following HTML is generated:</p>
					<pre><![CDATA[<p>XSLidy can generate links to sections such as the section about <a href="#(23)">ID/IDREF</a>, this link is then translated into the appropriate HTML code, meaning a link with the target being a fragment identifier to the slide number.</p>]]></pre>
				</slide>
				<slide>
					<title>ID/IDREF Semantics</title>
					<ul>
						<li>Rooted in the document world</li>
						<ul>
							<li>all parts are assembled before processing</li>
							<li>names are symbolic and assigned as required</li>
							<li>mixed syntax and semantics</li>
						</ul>
						<li>Good idea, but many shortcomings</li>
						<ul>
							<li>constraints apply to one document only</li>
							<li>IDs and IDREFs are global instead of scoped</li>
							<li>identifiers should be allowed to use any type</li>
							<li>identifier processing should be type-specific (2 &#x225F; 02)</li>
						</ul>
						<li>Applications must know how to process ID/IDREF</li>
						<ul>
							<li>for HTML export, links can be generated</li>
							<li>for databases, keys should be used</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="dtd-entity">
				<title>Entities</title>
				<slide>
					<title>General Entities</title>
					<ul>
						<li>XML's core concept of physical data structures</li>
						<ul>
							<li>an entity is a named unit of data which can be referenced</li>
							<li>within documents, it is referenced by the markup <code>&amp;entity-name;</code></li>
						</ul>
						<li>Entities can be used to name and reuse document content</li>
					</ul>
					<listing src="xhtml-lat1.ent" line="135-142"/>
					<ul>
						<li><em>Character References</em> look like entities: <code>&amp;#9786;</code> or <code>&amp;#x263A;</code> = &#x263A;</li>
						<ul>
							<li>they can be used to represent any Unicode character, they are processed as single characters</li>
						</ul>
					</ul>
				</slide>
				<slide id="param-entity">
					<title>Parameter Entities</title>
					<ul>
						<li>Parameter entities are parsed entities for use within the DTD</li>
						<ul>
							<li>a parameter entity must be specifically declared as such</li>
							<li>within DTDs, it is referenced by the markup <code>%entity-name;</code></li>
							<li>outside of DTDs, parameter entities cannot be used</li>
						</ul>
						<li>As general entities, parameter entities are meant for reuse</li>
						<ul>
							<li>in a DTD, reuse is mostly about reusing structures</li>
							<li>parameter entities are DTDs <q>duct tape</q>, not elegant, but effective</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XHTML Parameter Entities (Attributes)</title>
					<listing src="xhtml1-transitional.dtd" line="433-437"/>
					<listing src="xhtml1-transitional.dtd" line="188-188"/>
					<listing src="xhtml1-transitional.dtd" line="133-138"/>
					<listing src="xhtml1-transitional.dtd" line="145-149"/>
					<listing src="xhtml1-transitional.dtd" line="55-56"/>
					<listing src="xhtml1-transitional.dtd" line="193-193"/>
				</slide>
				<slide>
					<title>XHTML Parameter Entities (Content)</title>
					<listing src="xhtml1-transitional.dtd" line="433-437"/>
					<listing src="xhtml1-transitional.dtd" line="230-230"/>
					<listing src="xhtml1-transitional.dtd" line="227-227"/>
					<listing src="xhtml1-transitional.dtd" line="203-204"/>
					<listing src="xhtml1-transitional.dtd" line="200-201"/>
					<listing src="xhtml1-transitional.dtd" line="197-198"/>
					<listing src="xhtml1-transitional.dtd" line="222-222"/>
				</slide>
			</part>
		</part>
		<part>
			<title>More Advanced DTDs</title>
			<slide>
				<title>Additional Mechanisms</title>
				<ul>
					<li>DTDs have more advanced mechanisms</li>
					<ul>
						<li>used in few applications, mostly by SGML veterans</li>
						<li>should not be used in new projects</li>
					</ul>
					<li><em>Conditional Sections</em> for configurable DTDs</li>
					<ul>
						<li>parts of a DTD can be enclosed in special constructs</li>
						<li>based on parameter entity setting, these parts can be switched <q>on</q> or <q>off</q></li>
					</ul>
					<li><em>External Entities</em> for referencing external resources</li>
					<ul>
						<li><em>parsed entities</em> contain content parsed by the XML processor</li>
						<li>inclusion should be done with <em>XInclude</em></li>
						<li><em>unparsed entities</em> contain non-XML content (e.g., images or plain text)</li>
						<li>referring to non-XML content is handled on the application level</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>DTD for XML Schemas</title>
				<ul>
					<li>XML documents are processed by applications</li>
					<li>Applications have assumptions about XML documents</li>
					<li>DTDs allow to formalize some of these constraints</li>
					<li>Part of the constraint checking must still be programmed</li>
				</ul>
			</slide>
			<slide>
				<title>Modeling DTDs</title>
				<ul>
					<li>Data models can be mapped to many different DTDs</li>
					<li>What is a good DTD? What is a bad DTD?</li>
					<li>How does the DTD affect further processing?</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="bestpractices" cover="slidycover">
		<title short="Best Practices">The Good, the Bad, and the Ugly</title>
		<date short="2006-09-07">Thursday, September 7, 2006</date>
		<toc id="reading"><a href="http://dret.net/netdret/docs/wilde-elpub2006-xml.pdf#page=18">Chapter 3-3.4 (pp. 18-25)</a></toc>
		<toc id="resources"><q><a href="http://www.tbray.org/ongoing/When/200x/2006/01/09/On-XML-Language-Design">On XML Language Design</a></q></toc>
		<toc id="abstract">While XML it rather easy to understand and use, it is also rather easy to use XML in ways which either produce <q>ugly</q> XML, or which may lead to problems in components further processing the XML. The topic of this lecture thus is to look at design guidelines for XML schemas, leading to <q>good</q> XML. Some of the simpler topics cover basic questions of how to map a data model to XML markup (e.g., when to use elements or attributes). The next question is how data should be represented in XML so that applications can process it efficiently. We also look at what part of the markup an application will actually have access to, and this is defined by the <em>XML Information Set (Infoset)</em>, the specification underlying many XML technologies.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>XML Best Practices</title>
			<ul>
				<li><link href="goodxml">Good</link>: What you should do when using XML</li>
				<li><link href="badxml">Bad</link>: What you should not do when using XML</li>
				<li><link href="uglyxml">Ugly<sup>1</sup></link>: What you maybe have to do when using XML</li>
				<li><link href="infoset">Ugly<sup>2</sup></link>: XML's ugly little secret...</li>
			</ul>
		</slide>
		<part id="goodxml">
			<title short="Good XML">XML Best Practices</title>
			<slide>
				<title>Markup and Schemas</title>
				<ul>
					<li>XML can be encountered in different ways</li>
					<ol>
						<li>as somebody having to process XML documents</li>
						<li>as somebody having to understand XML documents</li>
						<li>as somebody having to generate XML documents</li>
						<li>as somebody having to design XML schemas</li>
					</ol>
				</ul>
			</slide>
			<part id="good-documents">
				<title>XML Documents</title>
				<slide>
					<title>Generating XML</title>
					<ul>
						<li>Character encoding</li>
						<ul>
							<li>use one of XML's standard encodings (UTF-8 or UTF-16)</li>
							<li>if you are using mostly latin characters, UTF-8 is much more compact</li>
							<li>any other character encoding may cause interoperability issues</li>
						</ul>
						<li>Pretty-printing (adding line feeds and indentation)</li>
						<ul>
							<li>pretty-printed XML is easier to read for humans</li>
							<li>pretty printed XML contains unnecessary whitespace</li>
							<li>pretty-printing is good for experiments and prototypes</li>
							<li>pretty printing should be switched off for production systems</li>
						</ul>
					</ul>
				</slide>
				<slide id="xml-views">
					<title>XML Views</title>
					<ul>
						<li>Other people may use different tools</li>
						<ul>
							<li>XML is a character-based formats, so every character counts</li>
							<li>other people may choose different technologies</li>
							<li>even your XML editor may choose to see things differently</li>
						</ul>
						<li>Many XML technologies use abstractions</li>
						<ul>
							<li>useful for concentrating on the <em>tree view</em></li>
							<li>no full control of markup usage (automatic serialization)</li>
							<li>think about working with a tree rather than working with a text file</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="good-dtd">
				<title>XML DTDs</title>
				<slide id="model-to-markup">
					<title>From Model to Markup</title>
					<ul>
						<li>There should be a conceptual model of the data</li>
						<ul>
							<li>formal conceptual models for XML are an active field of research</li>
							<li>informal models may use any notation</li>
						</ul>
						<li>Model design should omit questions of markup design</li>
						<ul>
							<li>element/attribute decisions are not a model question</li>
							<li>hierarchy/reference decisions are not a model question</li>
							<li>identifying the relevant entities and their relationships is a good idea</li>
						</ul>
						<li>Document engineering never invented modeling tools</li>
						<ul>
							<li>for document modelers, <q>the markup is the model</q></li>
							<li>there are no established notations for modeling documents</li>
							<li>document-type parts (e.g., mixed content) are hard to include in models</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>From Graphs to Trees</title>
					<ul>
						<li>In the model, <em>n:m</em> relationships may appear</li>
						<ul>
							<li>in an address database, an address should be reusable</li>
							<li>in a résumé, an organization's information should be reusable</li>
						</ul>
						<li>XML documents are trees</li>
						<ul>
							<li>all non-tree structures must be represented by tree structures</li>
							<li>in most cases, this will be done by introducing references</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>From Markup to Model</title>
					<ul>
						<li>Start with a sample instance</li>
						<ol>
							<li>start with a sample instance</li>
							<li>generate a schema for the instance with some tool</li>
							<li>open up the schema where necessary</li>
							<li>try creating more example instances <em>as different as possible/required</em></li>
							<li>write code for manipulating your test set of instances</li>
						</ol>
						<li>Restarting may be hard, but should be done</li>
						<ul>
							<li>view the initial design as a test bed, not as the <q>first version</q></li>
							<li>after you have learned some lessons, <em>throw everything away</em></li>
							<li>restart by designing everything from scratch</li>
							<li>content may be salvaged by writing small XSLT programs</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Top-Down or Bottom-Up?</title>
					<ul>
						<li>Both strategies have strengths and shortcomings</li>
						<ul>
							<li><em>top-down</em> tends to result in markup which looks <q>generated</q></li>
							<li><em>bottom-up</em> tends to result in markup which is less consistent</li>
						</ul>
						<li>Consistency is an important consideration</li>
						<ul>
							<li>if you dislike attributes, avoid them wherever possible</li>
							<li>if you like attributes, use them wherever possible</li>
							<li>don't mix these two styles of markup design</li>
						</ul>
					</ul>
				</slide>
				<slide id="reuse">
					<title>Reuse is Good</title>
					<ul>
						<li>Elements can be reused in different contexts</li>
						<ul>
							<li>elements then appear in the content model of more than one element</li>
							<li>an <code>address</code> may be used for <code>employee</code> as well as for <code>customer</code></li>
						</ul>
						<li>Content can be reused in different contexts</li>
						<ul>
							<li>(parts of) a content model may be useful in different contexts</li>
							<li>this only reuses an element's content, but not its name</li>
						</ul>
						<li>Attributes can be reused in different contexts</li>
						<ul>
							<li>technically, attributes are element-specific and have no relations when appearing on different elements</li>
							<li>when reusing attribute names, they should represent the same concept</li>
						</ul>
					</ul>
					<listing src="reuse.xml" line="3-16"/>
				</slide>
				<slide>
					<title>Reuse is Hard (in DTDs)</title>
					<ul>
						<li>Element reuse simply lists the element in more than one content model</li>
						<li>Content reuse requires parameter entities</li>
						<li>Attribute reuse requires parameter entities</li>
						<li>Nested parameter entities for multi-level reuse</li>
					</ul>
					<listing src="reuse.dtd"/>
				</slide>
			</part>
			<part>
				<title>General XML Issues</title>
				<slide id="element-vs-attribute">
					<title>Element vs. Attribute</title>
					<ul>
						<li>Elements and attributes are containers</li>
						<ul>
							<li>both contain character content</li>
						</ul>
						<li>Elements may carry attributes and may contain other elements</li>
						<ul>
							<li>for nested structures, elements must be chosen</li>
							<li>if the content needs to be annotated with an attribute, an element must be chosen</li>
							<li>if the item should be repeatable, an element must be chosen</li>
						</ul>
						<li>Attributes use less markup and have types</li>
						<ul>
							<li>if the content is (unstructured) <q>metadata</q>, an attribute may be a good choice</li>
							<li>for special types (ID/IDREF and enumerations), attributes are required</li>
							<li>if simple markup is an issue, attributes may be preferable</li>
						</ul>
						<li>Be consistent in you markup design style!</li>
					</ul>
				</slide>
				<slide>
					<title>Hierarchy vs. Reference</title>
					<ul>
						<li>Hierarchies are only possible with <em>1:n</em> relationships</li>
						<ul>
							<li>for <em>n:m</em> relationships, references are the only possible representation</li>
						</ul>
						<li>Containment should be represented as hierarchy</li>
						<ul>
							<li>containment limits the lifetime of the contained part to that of the container</li>
						</ul>
					</ul>
					<listing src="address-hierarchy.xml" line="2-11"/>
					<listing src="address-reference.xml" line="2-11"/>
				</slide>
				<slide id="granularity">
					<title>Granularity</title>
					<ul>
						<li>XML structures should identify the relevant information</li>
						<ul>
							<li>what exactly means <q>relevant</q>?</li>
							<li>very high granularity makes data acquisition hard</li>
							<li>very high granularity makes data processing easy</li>
						</ul>
						<li>Granularity is a general problem of data modeling</li>
						<ul>
							<li>XML is simply a syntax for representing structured data</li>
							<li><code>&lt;phone>+1-510-6432253&lt;/phone></code></li>
							<li><code>&lt;phone cc="1" area="510" local="6432253"/></code></li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="badxml">
			<title>Bad XML</title>
			<slide>
				<title>Consistent Markup</title>
				<ul>
					<li>Decide on a strategy and stick to it</li>
					<li>Inconsistent markup is hard to work with</li>
					<li>Do not try to use markup itself for data representation</li>
					<ul>
						<li><q>attribute values in single quotes should be ignored</q></li>
						<li><q>empty elements using empty element tags have a special meaning</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Simple Markup</title>
				<ul>
					<li>XML can be read and edited by hand</li>
					<ul>
						<li>this depends on the application scenario and markup design</li>
						<li>human-accessible XML should be a markup design goal</li>
					</ul>
					<li>Tool requirements</li>
					<ul>
						<li>if your documents can only be used with tool xyz, something is wrong</li>
						<li>XML should be used for open data formats in open environments</li>
					</ul>
					<li>Undocumented side-effects</li>
					<ul>
						<li>data models may include more dependencies than encoded in the schema</li>
						<li>clearly document these side-effects so that users are warned</li>
						<li>if possible, document them in a machine readable way using <link href="schematron">a schema language</link></li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="uglyxml">
			<title>Ugly XML</title>
			<slide id="redundant-data">
				<title>Redundant Data</title>
				<ul>
					<li>Redundant data is bad</li>
					<ul>
						<li>database design emphasizes <em>normalization</em> to eliminate redundant data</li>
						<li>normalization is difficult, creates complex structures, and makes data access slower</li>
						<li>real-life models and databases always contain redundancies</li>
					</ul>
					<li>Redundant data is used very frequently</li>
					<ul>
						<li>the <a href="http://zip4.usps.com/zip4/citytown_zip.jsp">ZIP code identifies state and city/cities</a></li>
						<li>very few address databases normalize street names (or numbers)</li>
					</ul>
					<li>Redundancy can be used for error detection/correction</li>
				</ul>
			</slide>
			<slide id="schema-redundancy">
				<title>Redundancy in the Schema</title>
				<ul>
					<li>Redundant data in schemas is very bad</li>
					<ul>
						<li>schema inspection cannot reveal the <q>same objective</q> behind the same markup</li>
						<li>further schema development will introduce inconsistencies</li>
					</ul>
					<li>Redundant data in schemas should be avoided</li>
					<ul>
						<li>schemas are a small and well-designed dataset</li>
						<li>schema design and maintenance are important issues</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Generically Generated Markup</title>
				<ul>
					<li>Some XML designers generate their schemas</li>
					<ul>
						<li>generated schemas are more likely to be not very well-designed</li>
						<li>the schema generation process may be poorly implemented</li>
					</ul>
					<li>Some schemas are based on a very generic markup</li>
					<ul>
						<li>the structure actually is in the content, not in the markup</li>
						<li>XML tools will not be very useful when working with these documents</li>
					</ul>
				</ul>
				<listing src="generic.xml" line="2-14"/>
			</slide>
		</part>
		<part id="infoset">
			<title short="Infoset">XML Information Set (XML Infoset)</title>
			<slide>
				<title>What is the Content of an XML Document?</title>
				<ul>
					<li>An interesting (and fruitless) discussion</li>
					<ul>
						<li>the content is whatever you consider it to be</li>
						<li>agreement between peers is necessary for data exchange</li>
						<li>agreement between specification writers and toolmakers is necessary to provide tools</li>
					</ul>
					<li>DOM and XSLT were two early arrivals</li>
					<ul>
						<li>both had an idea (and a model) of what the content of an XML document is</li>
						<li>they did not have the exact same idea</li>
					</ul>
					<li>Set a normative standard for an XML document's content</li>
					<ul>
						<li>the Infoset defines what is represented in the tree</li>
						<li>people should be confident to get this information when using XML technologies</li>
					</ul>
				</ul>
			</slide>
			<slide id="not-infoset">
				<title>What is <u>Not</u> in the Infoset</title>
				<ul>
					<li>Do not rely on <a href="http://www.w3.org/TR/xml-infoset/#omitted">information not available in the Infoset</a></li>
					<ul>
						<li>order of attributes</li>
						<li>type of quotes around attribute values</li>
						<li>notation of empty elements (<code>&lt;elem>&lt;/elem></code> vs. <code>&lt;elem/></code>)</li>
						<li>how lines are terminated</li>
						<li>entities and character references</li>
					</ul>
					<li>XML contains all this information if used as XML document</li>
					<li>many XML technologies are in fact Infoset technologies</li>
					<ul>
						<li>XML Schema, XSLT, XQuery, SOAP, ...</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML and Modeling</title>
				<ul>
					<li>XML is about representing structured data</li>
					<li>XML is a format for representing trees</li>
					<li>Data models often are not trees</li>
					<li>Mapping data models to trees can be done in many ways</li>
				</ul>
			</slide>
			<slide>
				<title>Assignment</title>
				<ul>
					<li><a href="a/2/">Assignment 2</a> is a simple Modeling task</li>
					<ul>
						<li>we provide a sample instance and some requirements</li>
						<li>create an XML version of the sample instance</li>
						<li>create a DTD which is more versatile than just working for the sample instance</li>
					</ul>
				</ul>
			</slide>
		</part>		
	</presentation>
	<presentation id="css" cover="slidycover">
		<title short="CSS">Cascading Style Sheets (CSS)</title>
		<date short="2006-09-12">Tuesday, September 12, 2006</date>
		<toc id="reading">Chapter 5 (pp. 164-204)</toc>
		<toc id="resources"><a href="http://www.w3.org/Style/CSS/">W3C CSS Specs</a>; <a href="http://jigsaw.w3.org/css-validator/">W3C CSS Validator</a></toc>
		<toc id="abstract"><em>Cascading Stylesheets (CSS)</em> have been designed as a language for better separating presentation-specific issues from the structuring of documents as provided by HTML. However, CSS can be applied to XML as well, either directly (by applying a CSS stylesheet to an XML document), or as an supplement to basic HTML layout structures generated from an XML document. CSS uses a simple model of <em>selectors</em> and <em>declarations</em>. Selectors specify to which elements of a document a set of declarations (each being a value assigned to a property) apply; in addition there is a model of how property values are inherited and cascaded. The biggest limitation of CSS is that it cannot change the structure of the displayed document.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<part>
			<title>Why CSS?</title>
			<slide>
				<title>Structure vs. Layout</title>
				<ul>
					<li>HTML started as very simple layout-oriented structures</li>
					<ul>
						<li>more layout control was introduced as attributes (<xml>align</xml>, <xml>color</xml>)</li>
						<li>HTML became increasingly <q>polluted</q> by layout information</li>
					</ul>
					<li>CSS was introduced as a format for layout information</li>
					<ul>
						<li>the HTML can be kept simple, containing only the structures</li>
						<li>layout information can be reused by using separate CSS files</li>
					</ul>
					<li>CSS has been designed for HTML</li>
					<ul>
						<li>it has been generalized to also cover XML</li>
						<li>the HTML heritage is still very visible in CSS</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>HTML vs. XML</title>
				<listing src="naked.html" line="2-17"/>
				<listing src="styled.xml" line="3-18"/>
			</slide>
			<slide>
				<title>HTML vs. XML</title>
				<ul>
					<li>HTML's built-in formatting rules can be expressed by CSS</li>
					<ul>
						<li>CSS has been extended to cover all HTML formatting</li>
						<li>any element can be defined to be formatted like an HTML element</li>
					</ul>
				</ul>
				<listing src="naked.css"/>
			</slide>
			<slide>
				<title>What's Missing?</title>
				<ol>
					<li>Restructuring content</li>
					<ul>
						<li>CSS assigns formatting properties to elements</li>
						<li>the document tree which is formatted cannot be restructured</li>
						<li>parts can be ignored or new parts can be inserted</li>
					</ul>
					<li>Interpreting content</li>
					<ul>
						<li><elem>img</elem> has a lot of special meanings attached</li>
						<li>all form elements have very special semantics</li>
					</ul>
				</ol>
			</slide>
		</part>
		<part>
			<title>How CSS Works</title>
			<slide>
				<title>CSS in Action</title>
				<listing src="zengarden.html" line="17-30" href="http://www.csszengarden.com/"/>
			</slide>
			<slide>
				<title>HTML and CSS</title>
				<ul>
					<li>CSS specifies how HTML elements are formatted</li>
					<ol>
						<li>formatting can be attached to every element (redundant inside document)</li>
						<li>formatting can be included in document (redundant across documents)</li>
						<li>separate CSS files describe the formatting (best reuse)</li>
					</ol>
					<li>Any combination of these methods is possible</li>
				</ul>
				<listing src="css-usage.html" line="3-13"/>
			</slide>
			<slide>
				<title>XML and CSS</title>
				<ul>
					<li>HTML has special elements &amp; attributes</li>
					<ul>
						<li><elem>link</elem> and <elem>style</elem> as header elements</li>
						<li>the <xml>style</xml> attribute on all body elements</li>
					</ul>
					<li>XML has no fixed set of elements or attributes</li>
					<ul>
						<li>it would have been possible to define a special <q>CSS namespace</q></li>
						<li>instead, it was decided to <a href="http://www.w3.org/TR/xml-stylesheet/" title="W3C specification: Associating Style Sheets with XML documents">have a processing instruction for making the connection</a></li>
					</ul>
				</ul>
			<listing src="styled.xml" line="1-5"/>
			<ul>
				<li>The CSS then must select the elements</li>
			</ul>
			<listing src="naked.css" line="1-2"/>
			</slide>
			<slide>
				<title>Formatting Model</title>
				<ul>
					<li>Properties are central to the CSS formatting model</li>
					<ol>
						<li>create a document tree</li>
						<li>identify the media type (e.g., <css>screen</css> or <css>print</css>)</li>
						<li>retrieve all stylesheets required for the media type</li>
						<li>assign values to all properties in the document tree</li>
						<li>generate a <em>formatting structure</em> (a different tree)</li>
						<li>render the formatting structure on the target medium</li>
					</ol>
					<li>Properties control the rendering of elements</li>
					<li>Styling in CSS means assigning values to properties</li>
				</ul>
			</slide>
		</part>
		<part id="css-properties">
			<title>Properties</title>
			<slide>
				<title>Formatting Instructions</title>
				<ul>
					<li>Properties define how elements are formatted</li>
					<ul>
						<li>they define a specific facet of formatting</li>
						<li>they may have interdependencies with other properties</li>
						<li>they can be assigned explicitly</li>
						<li>they may be defined through <link href="css-cascading"/> or <link href="css-inheritance"/></li>
					</ul>
					<li>A property has a name and is used in a name/value-pair</li>
					<ul>
						<li>the name identifies the property that is being set</li>
						<li>the value space depends on the property</li>
						<li>some properties accept complex values</li>
						<li>sets of values: <css>p { font : bold italic large Palatino }</css></li>
						<li>sequences of values: <css>p { font-family : "Segoe UI", verdana, helvetica, arial, sans-serif }</css></li>
					</ul>
					<li>Property specifications can be grouped</li>
					<ul>
						<li><css>.thinboxed { border-width : 1px ; padding : 10px ; margin : 5px }</css></li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>CSS1 Properties</title>
				<slide>
					<title>Factoring out HTML</title>
					<ul>
						<li>CSS1 was published in <a href="http://www.w3.org/TR/REC-CSS1-961217">1996</a> and revised in <a href="http://www.w3.org/TR/1999/REC-CSS1-19990111">1999</a></li>
						<li>HTML suffered from too many attributes</li>
						<ul>
							<li>layout information was specified as CSS</li>
							<li>style attributes in HTML were marked as <q>deprecated</q></li>
						</ul>
						<li>A small set of formatting features as CSS properties</li>
						<ul>
							<li>font: <css>p { font : 80% sans-serif }</css></li>
							<li>color and background: <css>body { background : url(logo.jpeg) right top }</css></li>
							<li>text: <css>h1 { text-transform : uppercase }</css></li>
							<li>box: <css>p.quote { border-style : solid dotted }</css></li>
							<li>classification: <css>img { display : none }</css></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>CSS2 Properties</title>
				<slide>
					<title>CSS2</title>
					<ul>
						<li>CSS2 was published in <a href="http://www.w3.org/TR/1998/REC-CSS2-19980512/">1998</a> and is <a href="http://www.w3.org/TR/CSS21/">still being  revised (CSS2¹)</a></li>
						<li>CSS2¹ is what you can expect from modern browsers</li>
						<ul>
							<li>with IE (even IE7) being the exception</li>
						</ul>
						<li>CSS2 is a single and coherent specification</li>
						<ul>
							<li>CSS3 is a jungle of concurrent module development</li>
							<li>CSS3 will never be finished (some modules will, though)</li>
						</ul>
					</ul>
				</slide>
				<slide id="generated-content">
					<title>Generated Content</title>
					<ul>
						<li>CSS1 had no way of adding information to the document</li>
						<ul>
							<li>by using <css>display</css>, parts of the document could be ignored</li>
						</ul>
						<li><em>Generated content</em> allows content to come from the CSS</li>
						<ul>
							<li>only possible with <css>:before</css> and <css>:after</css> <em>pseudo-elements</em></li>
							<li>static strings: <css>p.abstract:before { content : "Abstract: " }</css></li>
							<li>special effects like <q>quotes</q>: <css>q:before { content : open-quote } </css></li>
							<li>counters: <css>h1:before { content: "Chapter " counter(chapter) ". " ; counter-increment : chapter }</css></li>
						</ul>
						<li>Quotes can be defined as being language dependent</li>
						<ul>
							<li><css>q:lang(en) { quotes : '"' '"' "'" "'" }</css></li>
							<li><css>q:lang(no) { quotes : "«" "»" '"' '"' }</css></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Tables</title>
					<ul>
						<li>CSS1 did not address table formatting</li>
						<ul>
							<li>table layout still had to be done using HTML attributes</li>
							<li>a lot of redundant code specifying cell alignment and borders</li>
						</ul>
						<li>CSS2 introduced tables on the CSS level</li>
					</ul>
					<pre>table    { display: table }
tr       { display: table-row }
thead    { display: table-header-group }
tbody    { display: table-row-group }
tfoot    { display: table-footer-group }
col      { display: table-column }
colgroup { display: table-column-group }
td, th   { display: table-cell }
caption  { display: table-caption }</pre>
				</slide>
				<slide>
					<title>Fixed vs. Automatic Table Layout</title>
					<ul>
						<li>HTML defines a complex table rendering algorithm</li>
						<ul>
							<li>tables are rendered incrementally</li>
							<li>table layout is determined by looking at the complete table</li>
						</ul>
					</ul>
					<table width="90%" cellspacing="10%">
						<thead>
							<tr>
								<th>Automatic</th>
								<th>Fixed</th>
							</tr>
						</thead>
						<tr>
							<td width="45%">
								<table border="1">
									<tr>
										<td>col 1 row 1</td>
										<td>col 2 row 1 col 2 row 1</td>
										<td>col 3 row 1 col 3 row 1 col 3 row 1</td>
									</tr>
									<tr>
										<td>col 1 row 2</td>
										<td>col 2 row 2 col 2 row 2</td>
										<td>col 3 row 2 col 3 row 2 col 3 row 2</td>
									</tr>
								</table>
							</td>
							<td width="45%">
								<table border="1" style="table-layout : fixed ; ">
									<tr>
										<td width="33%">col 1 row 1</td>
										<td width="33%">col 2 row 1 col 2 row 1</td>
										<td width="33%">col 3 row 1 col 3 row 1 col 3 row 1</td>
									</tr>
									<tr>
										<td>col 1 row 2</td>
										<td>col 2 row 2 col 2 row 2</td>
										<td>col 3 row 2 col 3 row 2 col 3 row 2</td>
									</tr>
								</table>
							</td>
						</tr>
					</table>
					<ul>
						<li>Clipping of contents allows more freedom</li>
						<ul>
							<li>HTML tables are designed to show everything</li>
							<li>many applications work better <a href="http://dret.net/glossary/xml">when table contents are clipped</a></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>CSS3 Properties</title>
				<slide>
					<title>CSS3</title>
					<ul>
						<li>CSS3 is modularized and huge</li>
						<li>Developments for different applications and scenarios</li>
						<ul>
							<li><a href="http://www.w3.org/Style/CSS/current-work">under construction</a> for some time to come</li>
							<li>implementations have to wait until the modules are more stable</li>
						</ul>
						<li>CSS3 contains many powerful features</li>
						<ul>
							<li>more powerful features mean a higher fall when <em>fallback behavior</em> occurs</li>
							<li>CSS3 modules will probably undergo evolutionary selection and mutation</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Multi-Column Layout</title>
					<ul>
						<li>Most column-based layouts use tables</li>
						<ul>
							<li>table columns are filled left-to-right, then top-to-bottom</li>
							<li>multicolumns allow content to flow between columns</li>
						</ul>
						<li>Multicolumn layouts are used in many Web pages</li>
						<li>Publishing tools are good at hiding the <elem>table</elem> problems</li>
					</ul>
					<listing src="multicol.html" line="2-14"/>
				</slide>
			</part>
		</part>
		<part id="css-selectors">
			<title>Selectors</title>
			<slide>
				<title>Select and Style</title>
				<ul>
					<li><link href="css-properties"/> are applied to elements</li>
					<ul>
						<li>properties can be directly applied in an element's <attr>style</attr> attribute</li>
						<li>in all other cases, <em>selectors</em> are used to select the styled elements</li>
					</ul>
					<li>Selectors are good for reusable CSS code</li>
					<ul>
						<li>identifying the most appropriate formatting classes is not easy</li>
						<li>planning for CSS for a larger site is a difficult task</li>
					</ul>
					<li>CSS project management should separate selectors and properties</li>
					<ol>
						<li>selectors are about which things should be identified and styled</li>
						<li>properties are about how this styling is implemented</li>
					</ol>
				</ul>
			</slide>
			<part id="css1-selectors">
				<title>CSS1 Selectors</title>
				<slide>
					<title>CSS for Dummies</title>
					<ul>
						<li>Very small set of selectors</li>
						<ul>
							<li>selecting elements by name: <css>h1 { font-size : large }</css></li>
							<li>selecting elements by their <xml>id</xml>: <css>#author { font-weight : bold }</css></li>
							<li>selecting elements by their <xml>class</xml>: <css>.abstract { font-size : small }</css></li>
							<li>combining these mechanisms: <css>p.warning { color : red } </css></li>
						</ul>
						<li>Pseudo-classes and -elements allow interesting effects</li>
						<ul>
							<li><elem>a</elem> links have state: <css>a:visited</css> and <css>a:active</css></li>
							<li>selection without markup: <css>p:first-letter</css> and <css>p:first-line</css></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>CSS2 Selectors</title>
				<slide>
					<title>More Selectors</title>
					<ul>
						<li><link href="css1-selectors"/> are available</li>
						<ul>
							<li>element name, <attr>id</attr>, <attr>class</attr>, and combinations of these</li>
						</ul>
						<li>CSS2 introduced many new selectors</li>
						<ul>
							<li>descendants: <css>ul li { font : italic }</css></li>
							<li>children: <css>ul > li { font : italic }</css></li>
							<li>adjacent siblings: <css>h1 + h2 { margin-top : 0.5em }</css></li>
							<li>attribute matching: <css>h1[lang=nl] { color : orange }</css></li>
						</ul>
						<li>CSS2 selectors are sufficient for most tasks</li>
						<li>Setting <attr>class</attr> attributes is very important</li>
					</ul>
				</slide>
				<slide>
					<title>CSS2 Pseudo Classes</title>
					<ul>
						<li><link href="css1-selectors">CSS1's pseudo-elements</link> are available</li>
						<ul>
							<li>link states and first letter and line of content</li>
						</ul>
						<li>CSS2 adds more qualifications for elements</li>
						<ul>
							<li>first child: <css>p:first-child { text-indent : 0 }</css></li>
							<li>dynamic behavior: <css>a:hover { ... } a:active { ... } a:focus { ... }</css></li>
							<li>language: <css>:lang(de) { quotes: '»' '«' '‹' '›' }</css></li>
							<li><link href="generated-content"/>: <css>q:before { content : open-quote } q:after { content : close-quote }</css></li>
						</ul>
						<li>Support for <em>Internationalization (I18N)</em> and <em>Localization (L10N)</em></li>
					</ul>
				</slide>
			</part>
			<part>
				<title>CSS3 Selectors</title>
				<slide>
					<title>CSS goes XPath</title>
					<ul>
						<li>CSS3 selectors introduce a wide array of new features</li>
						<ul>
							<li><link href="xpath">XPath</link> is a very general selection mechanism</li>
							<li>CSS3 re-invents some XPath features using new names</li>
							<li>other selectors are based on dynamic information, which is more useful</li>
						</ul>
						<li>Some ideas are very useful</li>
						<ul>
							<li><a href="http://dret.net/netdret/publications#wil98">highlighting targets</a>: <css>*:target { outline : red thin solid }</css></li>
							<li>selection highlighting: <css>*:selection { ... }</css></li>
							<li>(form) element states: <css>input:disabled { ... }</css></li>
						</ul>
						<li>Adoption and demand for other selectors is unclear</li>
						<ul>
							<li>attribute substrings: <css>p[title*="hello"] { ... }</css></li>
							<li>counting children: <css>p:nth-child(42) { ... }</css></li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>CSS Mechanics</title>
			<slide id="css-cascading">
				<title>Cascading</title>
				<ul>
					<li>Stylesheets may have three different origins</li>
					<ol>
						<li><em>page authors</em> associate CSS with their pages</li>
						<li><em>users</em> configure their browser to use some CSS</li>
						<li><em>user agents (browsers)</em> have built-in CSS how to style content</li>
					</ol>
					<li>Conflicts must be resolved using the following algorithm</li>
					<ol>
						<li>find all matching declarations (matching media type and selector)</li>
						<li>sort according to importance (browser &lt; user &lt; author)</li>
						<li>same importance must be sorted by specificity (more specific selectors)</li>
						<li>finally, sort by order in which they were specified</li>
					</ol>
					<li><css>!important</css> rules can influence the algorithm</li>
					<ul>
						<li>they are interpreted in step 2 (sorting by importance)</li>
						<li>browser &lt; user &lt; author &lt; author(important) &lt; user(important)</li>
					</ul>
				</ul>
			</slide>
			<slide id="css-inheritance">
				<title>Inheritance</title>
				<ul>
					<li>Properties often are inherited by children</li>
					<ul>
						<li>setting a table's <css>color</css> sets the <css>color</css> for all contents</li>
						<li>without inheritance, CSS stylesheets would have to be very large</li>
					</ul>
					<li>Inheritance is mostly intuitive</li>
					<ul>
						<li>in reality, it is a bit more complicated</li>
					</ul>
					<ol>
						<li><em>specified value:</em> what the property specified (<link href="css-cascading"/>, inheritance, or initial)</li>
						<li><em>computed value:</em> computed based on the environment (e.g., <css>ex</css> → <css>px</css>)</li>
						<li><em>used value:</em> converted to an absolute value (e.g., percentage widths)</li>
						<li><em>actual value:</em> specific for the environment (e.g., borders with pixel fractions)</li>
					</ol>
				</ul>
			</slide>
			<slide id="css-import">
				<title>Structuring Stylesheets</title>
				<ul>
					<li>Stylesheets may need to be structured</li>
					<ul>
						<li>importing CSS code is supported: <css>@import "/dretnet.css" ;</css></li>
						<li>modules of CSS code can be reused in different contexts</li>
					</ul>
					<li>Stylesheets may be specific for a media type</li>
					<ul>
						<li><em>braille, embossed, handheld, print, projection, screen, speech, tty, tv</em></li>
						<li>specified in HTML: <elem>link rel="stylesheet" type="text/css" media="print" href="/print.css"></elem></li>
						<li>specified in CSS: <css>@media print { ... }</css></li>
						<li>media-dependent import: <css>@import "/print.css" print ;</css></li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>CSS for Document Styling</title>
				<ul>
					<li>Appropriate for HTML</li>
					<ul>
						<li>Flexible selection of elements using <link href="css-selectors"/></li>
						<li>Powerful formatting of elements using <link href="css-properties"/></li>
						<li>Interesting interface design with <em>pseudo-classes</em> and <em>-elements</em></li>
					</ul>
					<li>Inappropriate for XML</li>
					<ul>
						<li>Assigning values to properties is too simple</li>
						<li>XML documents often needs to be restructured</li>
						<li><link href="xslt1">XSLT</link> is the language for restructuring XML</li>
						<li>XML → HTML+CSS is a popular Web publishing setup</li>
					</ul>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xmlns" cover="slidycover">
		<title short="Namespaces">XML Namespaces</title>
		<date short="2006-09-14">Thursday, September 14, 2006</date>
		<toc id="reading"><a href="http://www.rpbourret.com/xml/NamespacesFAQ.htm#p1">XML Namespaces FAQ (Part I)</a></toc>
		<toc id="resources"><a href="http://www.w3.org/TR/REC-xml-names/">W3C's <q>XML Namespaces</q> Specification</a></toc>
		<toc id="abstract">XML is successful because it can be used in many different scenarios, and because it is easy to define a schema (such as a DTD) for new scenarios, producing a tailored XML data model for this scenario. This means that names in XML documents must be interpreted as belonging to a certain schema. As long as a document uses names from only one schema, this can be done rather easily. However, in many scenarios today documents combine names from different schemas, and <em>XML Namespaces</em> provide a mechanism how the names in an XML document can be associated with a namespace.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>Class Survey</title>
			<table width="90%" style="margin : 3%">
				<tr>
					<td align="center" valign="middle">
						<img src="survey1.gif"/>
					</td>
					<td align="center" valign="middle">
						<img src="questionmark.jpg"/>
					</td>
				</tr>
			</table>
		</slide>
		<part>
			<title>How to think about Namespaces</title>
			<slide>
				<title>Namespaces are Simple</title>
				<ul>
					<li>XML Namespaces are often misunderstood</li>
					<ul>
						<li>the biggest problem is to get rid of some assumptions</li>
						<li>XML Namespaces are too simple and thus confusing</li>
					</ul>
					<li>Instincts of Web users</li>
					<ol>
						<li>URIs identify something that can be retrieved by a browser</li>
						<li>URIs identify something that can be displayed by a browser</li>
						<li>if I cannot get it and cannot look at it, what good can it be?</li>
					</ol>
					<li>However, these assumptions are not always true</li>
					<ol>
						<li>URIs identify <em>resources</em> which often, but not always, can be accessed over the Web</li>
						<li>URIs identify <em>resources</em> which often, but not always, have a Web-accessible representation</li>
						<li>sharing URIs means sharing an identity, which can mean sharing semantics (associated with this identity)</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Simple Examples</title>
				<listing src="mathml1.xml" line="2-6"/>
				<listing src="mathml2.xml" line="2-6"/>
				<listing src="mathml3.xml" line="2-6"/>
				<listing src="mathml4.xml" line="2-6"/>
			</slide>
			<slide>
				<title>Name Spaces</title>
				<ul>
					<li>Names are one form of identification</li>
					<li>Identification is essential for communications</li>
					<li>Names in XML are not suitable for identification</li>
					<ul>
						<li>they are local to their context (where they are defined)</li>
						<li>if the context is uniquely identified, the names would be, too</li>
					</ul>
					<li>Name Spaces: <em>Put names into spaces</em></li>
					<ul>
						<li>how to identify the space? Web things are identified by URIs</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>URI Philosophy</title>
				<ul>
					<li><link href="uri"/> uniquely identify resources</li>
					<li>URIs often provide access information</li>
					<ul>
						<li>pretty clear in <code>http://dret.net/lectures/xml-fall06/</code></li>
						<li>less clear in <code>urn:ietf:rfc:2648</code>  (<a href="http://dret.net/biblio/reference/rfc2648">RFC 2648</a>)</li>
						<li>very (and purposely) unclear in <code>tag:9327493874329</code>  (<a href="http://dret.net/biblio/reference/rfc4151">RFC 4151</a>)</li>
					</ul>
					<li>URIs often return <em>resource representations</em></li>
					<ul>
						<li>the resource itself is never returned (how to return a <em>lecture</em>?)</li>
						<li>some representation often is useful (HTML, PDF, maybe video/audio)</li>
						<li>the resource exists and is useful without a representation!</li>
					</ul>
					<li>URIs are much more than just addresses of HTML pages</li>
				</ul>
			</slide>
			<slide>
				<title>The Namespace Problem</title>
				<ul>
					<li>People assume than URIs point to Web pages</li>
					<ul>
						<li>a <em>namespace name</em> (a URI) may point to a Web page</li>
						<li>it may also have no Web page associated with it</li>
						<li>it may even use a URI scheme which cannot be retrieved</li>
						<li>but it is still possible to compare URIs!</li>
					</ul>
					<li>People assume some standardized content format</li>
					<ul>
						<li>friendly namespaces provide HTML portals (<a href="http://www.w3.org/1999/xhtml">XHTML</a>)</li>
						<li>some namespaces just give you the schema (<a href="http://www.w3.org/2001/12/soap-envelope">SOAP</a>)</li>
						<li>less friendly namespaces provide minimal information (<a href="http://www.w3.org/1999/XSL/Transform">XSLT</a>)</li>
						<li>very unfriendly namespaces may return a 404 or even use inaccessible schemes</li>
						<li>but they all are valid, because no resource representation is required!</li>
					</ul>
					<li>Namespaces are used by comparing URIs</li>
					<ul>
						<li>anything else maybe useful, but is not strictly required</li>
						<li>when searching for a namespace definition, use Google (string search)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Using Namespaces</title>
			<slide>
				<title>Declaring Namespaces</title>
				<ul>
					<li>Using a namespace means referencing names from it</li>
					<ul>
						<li>unfortunately, there is no really standard way of writing these names</li>
						<li>(the <q><a href="http://www.jclark.com/xml/xmlns.htm">Clark notation</a></q> is useful: <code>{http://www.w3.org/1999/xhtml}html</code>)</li>
						<li>Namespaces are declared and then used</li>
					</ul>
					<li><xml>xmlns</xml>-prefixed attributes are used for declaring namespaces</li>
					<ul>
						<li>Default: <elem>html xmlns="http://www.w3.org/1999/xhtml"</elem></li>
						<li>Prefix: <elem>xhtml:html xmlns:xhtml="http://www.w3.org/1999/xhtml"</elem></li>
					</ul>
					<li>Namespace declarations are inherited and can be overwritten</li>
					<ul>
						<li>the default namespace can be undeclared</li>
						<li>Namespace declarations can be used in a myriad of ways</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Unhealthy Namespace Usages</title>
				<ul>
					<li>Namespaces can be (and are) used in very weird ways</li>
					<ul>
						<li>these are syntax variations of identical structures</li>
						<li>without a good (i.e., conforming) parser, interpretation is very hard</li>
						<li>copy/paste can become hard or impossible</li>
					</ul>
					<li>Namespaces can be <a href="http://lists.xml.org/archives/xml-dev/200204/msg00170.html">neurotic, psychotic, borderline, or normal</a></li>
					<li>Each of the insane cases complicates processing</li>
					<li>None of these has any real technical inaccuracies</li>
					<li>XML should be used with humans in mind</li>
				</ul>
			</slide>
			<slide>
				<title>Unhealthy Namespace Usages in Practice</title>
				<listing src="neurotic.xml" line="2-9"/>
				<listing src="borderline.xml" line="2-9"/>
				<listing src="psychotic.xml" line="2-9"/>
			</slide>
			<slide>
				<title>Elements and Attributes</title>
				<ul>
					<li>Namespaces often apply to elements and attributes</li>
					<ul>
						<li>if an element name has no prefix, it has no namespace or the default namespace associated</li>
						<li>if a name has a prefix, the prefix must be bound to a namespace name</li>
						<li>names like this are called <em>Qualified Names (QNames)</em></li>
					</ul>
					<li>Elements and Attributes are treated differently</li>
					<ul>
						<li>the default namespace only applies to unprefixed element names</li>
						<li>unprefixed attribute names are in no namespace</li>
						<li><link href="xsd1">XML Schema</link> deals with this by <link href="xsd-names">keeping attributes <q>local</q></link></li>
					</ul>
					<li>Applications should interpret QNames</li>
					<ul>
						<li>naïve implementations will break when processing unhealthy instances</li>
						<li>the mechanics of implementing namespaces are not very hard</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Other Usages</title>
				<ul>
					<li>Increasingly, QNames are used in content</li>
					<ul>
						<li><link href="xslt1">XSLT</link> was the first specification using this</li>
						<li>many other technologies have followed</li>
					</ul>
				</ul>
				<pre><![CDATA[<xsl:template match="section" xmlns:mathml="http://www.w3.org/1998/Math/MathML/">
<xsl:if test="exists(.//mathml:*)">]]></pre>
				<ul>
					<li>Technically, everything is well-defined</li>
					<ul>
						<li>for processing, the namespace bindings must be known</li>
						<li>copy/paste on a textual basis may not work or even work wrong</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Defining Namespaces</title>
			<slide>
				<title>Any URI is Possible</title>
				<ul>
					<li>A namespace name is a URI, that's all!</li>
					<ul>
						<li>it may not be accessible (because of the URI scheme)</li>
						<li>when retrieving it, nothing may be returned</li>
						<li>when retrieving it, something may be returned</li>
					</ul>
					<li>The only important thing is <em>the name</em></li>
					<ul>
						<li>the name is mentioned in the documentation</li>
						<li>if you know the documentation, you known the name</li>
						<li>shared names mean shared knowledge</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Namespace Definitions</title>
				<ul>
					<li>Namespaces can be defined by a DTD (<a href="http://www.w3.org/TR/xhtml1/#strict">XHTML</a>)</li>
					<li>Namespaces can be defined by an XML Schema (<a href="http://www.w3.org/TR/soap12-part1/#tabnsprefixes">SOAP</a>)</li>
					<li>Namespaces can be defined by RELAX NG (<a href="http://www.w3.org/TR/xhtml2/conformance.html#strict">XHTML 2.0</a>)</li>
					<li>Namespaces can be defined by prose (<a href="http://www.w3.org/TR/xslt#xslt-namespace">XSLT</a>)</li>
					<li>If schemas are provided, additional information is required</li>
					<ul>
						<li>it is unlikely that a namespace can be fully described by a schema</li>
						<li>additional constraints and semantics are specified in prose</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Structured Namespaces</title>
				<ul>
					<li>Namespaces have no structure</li>
					<ul>
						<li>a collection of names grouped by their namespace name</li>
						<li>inside the namespace, names have local meaning</li>
					</ul>
					<li>Namespace definitions to make up their own rules</li>
					<ul>
						<li>but then they must also make rules how to deal with conflicts</li>
					</ul>
					<li>XML Schema <a href="http://www.w3.org/TR/xmlschema-1/#concepts-nameSymbolSpaces">structures the namespace defined by a schema</a></li>
					<ul>
						<li>the different <q>parts</q> of the namespace are called <em>symbol spaces</em></li>
						<li>all XML Schema components have their own symbol space</li>
						<li><em>simple</em> and <em>complex types</em> share the same symbol space</li>
						<li>locally defined elements/attributes are in <q>sub symbol spaces</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Fixed or Extensible?</title>
				<ul>
					<li>Can a namespace change over time?</li>
					<ul>
						<li>may the namespace description become outdated? extended? replaced?</li>
						<li>this should be clearly documented in the namespace description</li>
					</ul>
					<li>The XML XML Namespace was widely believed <a href="http://www.w3.org/XML/1998/namespace">to be defined by XML</a></li>
					<ul>
						<li><xml>xml:lang</xml> and <xml>xml:space</xml> defined by XML</li>
						<li><xml>xml:base</xml> was added by <em>XML Base</em></li>
						<li><xml>xml:id</xml> was added by <em>xml:id</em></li>
					</ul>
					<li>When defining namespaces, plan ahead and publish everything</li>
					<ul>
						<li>dependencies, change management, and versioning issues are important</li>
						<li>there still is no accepted standard for namespace descriptions</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Namespace Descriptions</title>
				<img style="width : 90% ; margin : 2% ; " src="ns-description.png"/>
				<p class="quotenote"><a href="http://dret.net/netdret/publications#wil06h">Erik Wilde, <q>Structuring Namespace Descriptions</q>, 15th International World Wide Web Conference (WWW2006), Edinburgh, UK, May 2006.</a></p>
			</slide>
		</part>
		<part>
			<title>Processing Namespaces</title>
			<slide id="namespace-validity">
				<title>Namespaces and Validity</title>
				<ul>
					<li>Namespaces define an additional layer on top of XML</li>
					<ul>
						<li>they define additional semantics (assignment to namespaces)</li>
						<li>they define additional constraints (declaration and usage of namespaces)</li>
					</ul>
					<li>Namespace-awareness is a basic requirement for XML tools</li>
					<ul>
						<li>XML not compliant with XML Namespaces will break most tools</li>
						<li>processing namespaces should be done by tools</li>
						<li>a namespace-aware parser translates namespace declarations into nodes</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Namespaces in the Document</title>
				<listing src="mathml4.xml"/>
			</slide>
			<slide>
				<title>Namespaces in the Tree</title>
				<img src="xmlns-tree.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Name Spaces</title>
				<ul>
					<li><q>Bags of Names</q> with a URI as a label</li>
					<li>The URI does not necessarily return anything</li>
					<li>Namespaces can be defined in any way (e.g., schemas)</li>
					<li><a href="a/3/">Assignment 3</a> asks for CSS for simple HTML</li>
					<ul>
						<li>possible inspirations: <a href="http://www.csszengarden.com/">CSS Zen Garden</a></li>
					</ul>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xpath" cover="slidycover">
		<title short="XPath">XML Path Language (XPath)</title>
		<date short="2006-09-19">Tuesday, September 19, 2006</date>
		<toc id="reading"><a href="xpath-chapter.pdf">XPath Chapter</a></toc>
		<toc id="resources"><a href="xpath-quickref.pdf">XPath QuickRef</a></toc>
		<toc id="abstract">XML structures data into a rather small number of different constructs, most notably elements and attributes. The <em>XML Path Language (XPath)</em> defines a way how to select parts of XML documents, so that they can be used for further processing. XPath's primary use in in <em>XSL Transformations (XSLT)</em>, but other XML technologies use it as well, e.g. XML Schema. XPath is a very compact language with a syntax that resembles the path expressions which are well-known from file systems. These path expressions, however, are generalized and therefore much more powerful than the rather simple path expressions in file systems. Because of its use in different XML technologies, XPath is one of the most important XML core technologies.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<part>
			<title>Why XPath?</title>
			<slide>
				<title>Selecting Parts of XML Documents</title>
				<ul>
					<li>XML is a syntax for trees</li>
					<ul>
						<li>it defines a way for how trees can be exchanged</li>
					</ul>
					<li>XML technologies should provide for working with trees</li>
					<ul>
						<li>when receiving trees, access to the tree should be easy (DOM)</li>
						<li>validating trees should be easy (<link href="xsd1">XML Schema</link>)</li>
						<li>mapping trees should be easy (<link href="xslt1">XSLT</link>)</li>
						<li>XPath is like regular expressions for text-based information</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Making Selection Reusable</title>
				<ul>
					<li>Different XML technologies need selection</li>
					<ul>
						<li><link href="xslt1">XSLT</link> needs it for selecting parts and manipulating them</li>
						<li><link href="xsd1">XML Schema</link> needs it for applying identity constraints</li>
						<li>DOM needs it for extracting parts from an XML tree</li>
						<li>XQuery needs it for writing XML-oriented queries</li>
					</ul>
					<li>XPath was created to be reusable</li>
					<ul>
						<li>XML experts should only learn one selection language</li>
						<li>this knowledge can be reused when learning new technologies</li>
						<li>implementations can reuse code libraries</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>How XPath Evolved</title>
				<ul>
					<li>XSL was designed as the new XML stylesheet language</li>
					<ol>
						<li><link href="xslt1">XSL Transformations (XSLT)</link> transform the input document</li>
						<li><em>XSL Formatting Objects (XSL-FO)</em> is what they will transform it to</li>
					</ol>
					<li>XSLT was designed to work on arbitrary XML input documents</li>
					<ul>
						<li>started as a part of XSL (<a href="http://www.w3.org/TR/1998/WD-xsl-19981216">WD-xsl-19981216</a> → <a href="http://www.w3.org/TR/1999/WD-xslt-19990421">WD-xslt-19990421</a>)</li>
						<li>the application area was XSL-FO, but not strictly limited to that</li>
						<li>for selecting parts of the transformation input, a selection mechanism had to be provided</li>
					</ul>
					<li>XPath was turned into a standalone specification</li>
					<ul>
						<li>started as a part of XSLT (<a href="http://www.w3.org/TR/1999/WD-xslt-19990421">WD-xslt-19990421</a> → <a href="http://www.w3.org/1999/07/WD-xslt-19990709">WD-xslt-19990709</a>)</li>
						<li>reused in a number of other W3C specifications (XML Schema, DOM, XQuery)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>How XPath Works</title>
			<part id="xpath-tree">
				<title>The XPath Tree Model</title>
				<slide>
					<title>Starting from the Infoset</title>
					<ul>
						<li>XPath operates on an abstract data model</li>
						<ul>
							<li>a tree derived from the <link href="infoset"/></li>
							<li>a simplification (another one!) of the underlying XML</li>
						</ul>
						<li>The Infoset is turned into an <em>XPath node tree</em></li>
						<ul>
							<li>11 infoset item types → 7 XPath node tree node types</li>
							<li>character items are merged into text nodes</li>
							<li>namespace declarations are no longer visible as attributes</li>
						</ul>
					</ul>
				</slide>
				<slide id="not-xpath">
					<title>What is <u>Not</u> in the XPath Tree</title>
					<ul>
						<li>The same things which are <link href="not-infoset">not in the Infoset</link></li>
						<ul>
							<li>the order of attributes in a start tag</li>
							<li>the types of quotes around attribute values</li>
							<li>character references and entities (<code>&#xFC;</code>/<code>&amp;uuml;</code> → <code>ü</code>)</li>
						</ul>
						<li>And some more...</li>
						<ul>
							<li>namespace declarations are no longer visible as attributes</li>
							<li>notations and unexpanded entity references</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>XPath Evaluation</title>
				<slide>
					<title>Tree In / Selection Out</title>
					<ul>
						<li>XPath evaluates an expression based on a Tree</li>
						<li>Where the tree comes from is out of XPath's scope</li>
						<li>The result of the evaluation is a selection</li>
						<ul>
							<li><code>//img[not(@alt)]</code> → select all images which have no <code>alt</code> attribute</li>
							<li><code>count(//img)</code> → return the number of images</li>
							<li><code>/descendant::img[3]/@src</code> → return the third image's <code>src</code> URI</li>
							<li><code>starts-with(/html/@lang, 'en')</code> → test whether the document's language is english</li>
						</ul>
						<li>Syntax errors may occur</li>
						</ul>
				</slide>
			</part>
		</part>
		<part>
			<title short="Location Paths">XPath Location Paths</title>
			<slide>
				<title>Location Path Structure</title>
				<ul>
					<li>Each location path consists of <em>Location Steps</em></li>
					<ul>
						<li>location steps are separated by <q><code>/</code></q>, like path names in file systems</li>
					</ul>
					<li>Similarities between XPath location paths and file systems</li>
					<ol>
						<li>nodes in the <link href="xpath-tree">XPath tree</link> have different types</li>
						<li>the <link href="xpath-nodetest">type and number of nodes selected by one step</link></li>
						<li>the <link href="xpath-axes">direction in which each step moves</link></li>
						<li>additional <link href="xpath-predicates">filters for selecting specific nodes</link></li>
					</ol>
					<li>Differences between XPath location paths and file systems</li>
					<ol>
						<li>XPaths may return <link href="xpath-expressions">other data types than nodes</link></li>
						<li>XPath provides a <link href="xpath-functions">built-in function library</link></li>
					</ol>
				</ul>
			</slide>
			<part>
				<title short="Node Tests">XPath Node Tests</title>
				<slide>
					<title>File System vs. XPath Paths</title>
					<table style="margin : 5% ; " width="85%">
						<tr>
							<th>File System Path:</th>
							<td align="center"><code>/</code></td>
							<td align="center"><code>usr</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>local</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>apache</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>bin</code></td>
							<td align="center"><code>/</code></td>
						</tr>
						<tr>
							<th># Selected Nodes:</th>
							<td align="center">1</td>
							<td align="center">→ 1</td>
							<td align="center">→</td>
							<td align="center">1</td>
							<td align="center">→</td>
							<td align="center">1</td>
							<td align="center">→</td>
							<td align="center">1</td>
						</tr>
					</table>
					<table style="margin : 5% ; " width="85%">
						<tr>
							<th>XPath:</th>
							<td align="center"><code>/</code></td>
							<td align="center"><code>html</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>body</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>table</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>thead</code></td>
							<td align="center"><code>/</code></td>
							<td align="center"><code>tr</code></td>
						</tr>
						<tr>
							<th># Selected Nodes:</th>
							<td align="center">1</td>
							<td align="center">→ 1</td>
							<td align="center">→</td>
							<td align="center">1</td>
							<td align="center">→</td>
							<td align="center">6</td>
							<td align="center">→</td>
							<td align="center">4</td>
							<td align="center">→</td>
							<td align="center">12</td>
						</tr>
					</table>
				</slide>
				<slide id="xpath-nodetest">
					<title>Tests for Nodes</title>
					<ul>
						<li>Name tests</li>
						<ul>
							<li>testing for a particular name (elements/attributes): <code>/html/head/title</code></li>
							<li>wildcards (testing for any name): <code>/html/head/*</code></li>
						</ul>
						<li>Node type tests</li>
						<ul>
							<li>text nodes: <code>text()</code></li>
							<li>comment nodes: <code>comment()</code></li>
							<li>any nodes: <code>node()</code></li>
						</ul>
						<li>Processing instruction tests</li>
						<ul>
							<li>any PI: <code>processing-instruction()</code></li>
							<li>specific PI: <code>processing-instruction("xml-stylesheet")</code></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xpath-axes">
				<title short="Axes">XPath Axes</title>
				<slide>
					<title>Where Do You Want to Go Today?</title>
					<ul>
						<li>File system paths are one direction only</li>
						<ul>
							<li>always one level down in the file system hierarchy</li>
							<li><code>.</code> and <code>..</code> are clever directory shortcuts</li>
							<li>other directions supported by tools (e.g., <code>find</code>)</li>
						</ul>
						<li>XPath allows steps is different directions</li>
						<ul>
							<li>the default direction is <code>child</code></li>
							<li>other directions are explicitly specified: <code>descendant::a</code></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Ancestor Axis</title>
					<img style="height : 75%" src="axis_ancestor.gif"/>
				</slide>
				<slide>
					<title>Ancestor-or-self Axis</title>
					<img style="height : 75%" src="axis_ancestororself.gif"/>
				</slide>
				<slide>
					<title>Attribute Axis</title>
					<ul>
						<li>Attributes are <u>not</u> the children of elements, but ...</li>
						<li>... elements are their attributes' parent!</li>
						<ul>
							<li>very counter-intuitive</li>
							<li>very convenient</li>
						</ul>
						<li>Attributes are always leaves in the node tree</li>
						<li>Attribute Nodes <u>have</u> the attribute value as their value</li>
					</ul>
				</slide>
				<slide>
					<title>Child Axis</title>
					<img style="height : 75%" src="axis_child.gif"/>
				</slide>
				<slide>
					<title>Descendant Axis</title>
					<img style="height : 75%" src="axis_descendant.gif"/>
				</slide>
				<slide>
					<title>Descendant-or-self Axis</title>
					<img style="height : 75%" src="axis_descendantorself.gif"/>
				</slide>
				<slide>
					<title>Following Axis</title>
					<img style="height : 75%" src="axis_following.gif"/>
				</slide>
				<slide>
					<title>Following-sibling Axis</title>
					<img style="height : 75%" src="axis_followingsibling.gif"/>
				</slide>
				<slide>
					<title>Namespace Axis</title>
					<ul>
						<li>Namespace nodes are <u>not</u> the children of elements, but ...</li>
						<li>... elements are their namespaces' parent!</li>
						<ul>
							<li>very counter-intuitive</li>
							<li>very convenient</li>
						</ul>
						<li>Namespace nodes are always leaves in the node tree</li>
						<li>Namespace nodes <u>have</u> the namespace name (i.e., a URI) as their value</li>
						<li>Namespace nodes exist because of namespace declarations</li>
						<ul>
							<li>in the XPath node tree, only the namespace nodes are visible</li>
							<li>the namespace declaration attributes (<code>xmlns</code>) are invisible</li>
							<li>one namespace declaration potentially creates many namespace nodes</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Parent Axis</title>
					<img style="height : 75%" src="axis_parent.gif"/>
				</slide>
				<slide>
					<title>Preceding Axis</title>
					<img style="height : 75%" src="axis_preceding.gif"/>
				</slide>
				<slide>
					<title>Preceding-Sibling Axis</title>
					<img style="height : 75%" src="axis_precedingsibling.gif"/>
				</slide>
				<slide>
					<title>Self Axis</title>
					<img style="height : 75%" src="axis_self.gif"/>
				</slide>
				<slide>
					<title>Putting it all Together</title>
					<ul>
						<li>XPath location paths use a simple syntax</li>
						<ul>
							<li>sequence of location steps, separated by <q><code>/</code></q></li>
						</ul>
						<li>Each location step uses a simple structure (<code>preceding::p[@class="warning"]</code>)</li>
						<ol>
							<li>an axis followed by <q><code>::</code></q> (no axis uses the default axis <code>child</code>)</li>
							<li>a <link href="xpath-nodetest">node test</link></li>
							<li><em>0-n</em> <link href="xpath-predicates"/> enclosed in <q><code>[]</code></q></li>
						</ol>
						<li>Location paths can be abbreviated</li>
						<ul>
							<li><code>child::</code> can be omitted (default axis)</li>
							<li><code>attribute::</code> can be written as <q><code>@</code></q></li>
							<li><q><code>.</code></q> is an abbreviation for <code>self::node()</code></li>
							<li><q><code>..</code></q> is an abbreviation for <code>parent::node()</code></li>
							<li><q><code>//</code></q> is an abbreviation for <code>/descendant-or-self::node()/</code></li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xpath-predicates">
				<title>Predicates</title>
				<slide>
					<title>Location Step Filters</title>
					<ul>
						<li>Predicates are filters for each location step</li>
						<ul>
							<li>there can be any number of filters (<em>0-n</em>)</li>
							<li>each filter is applied to each selected node individually</li>
						</ul>
						<li>Each predicate is an XPath and evaluated as a boolean</li>					
						<ul>
							<li>the context of this evaluation is the node for which the filter is evaluated</li>
							<li>if the result is a number, it is compared with the <code>position()</code> function (<code>/descendant::a[5]</code>)</li>
						</ul>
						<li>Predicates always reduce the set of selected nodes</li>
						<ul>
							<li>as a corner case, the set of selected nodes does no change</li>
							<li>predicates are used in the majority of non-trivial XPath location paths</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Location Path Processing</title>
					<ul>
						<li>Location paths are processed in a very simple way</li>
						<ol>
							<li>start with a given context</li>
							<li>for each location step, repeat the following steps:</li>
							<li>based on the context and the axis, select the nodes on this axis</li>
							<li>reduce this selection to the nodes identified by the node test</li>
							<li>sequentially apply all filters to each of these nodes</li>
							<li>take the remaining node set as the context for the next location step</li>
						</ol>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xpath-expressions">
			<title>XPath Expressions</title>
			<slide>
				<title>Beyond Location Paths</title>
				<ul>
					<li>XPath is a full expression language</li>
					<ul>
						<li>any evaluated expression in XSLT is an XPath</li>
						<li>XPath must be able to calculate operate on non-XML data types</li>
					</ul>
					<li>XPath uses a very simple data model</li>
					<ol>
						<li>node sets: <code>//img[not(@alt)]</code></li>
						<li>number: <code>count(//img)</code></li>
						<li>string: <code>/descendant::img[3]/@src</code></li>
						<li>boolean: <code>starts-with(/html/@lang, 'en')</code></li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>XPath Usages</title>
				<ul>
					<li>XPath is used in different technologies</li>
					<ul>
						<li>XSLT uses XPath as its expression language</li>
						<li>XML Schema uses XPath for selecting identity constraint nodes</li>
						<li>DOM uses XPath as a way to select DOM nodes</li>
					</ul>
					<li>Depending on the environment, expression must yield certain results</li>
					<ul>
						<li>for conditionals, a boolean must be returned</li>
						<li>iterations (in XSLT) only loop over nodes</li>
						<li>when printing out text, a string must be produced</li>
					</ul>
					<li>XPath has built-in rules for casting types</li>
					<ul>
						<li>node set → boolean: empty is false, non-empty is true</li>
						<li>node → string: take the <em>string value</em> (i.e., concatenate all text node descendants)</li>
						<li>string → number: interpret as decimal notation (otherwise return <q><code>NaN</code></q>)</li>
						<li>XPaths often return surprising results (<code>//a[starts-with(@href, https)]</code>)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xpath-functions">
			<title>XPath Functions</title>
			<slide>
				<title>Function Library</title>
				<ul>
					<li>XPath has a small library of built-in functions</li>
					<ul>
						<li>useful for basic XPath-level functions</li>
						<li>other specs are allowed to extend it (XSLT does it)</li>
					</ul>
					<li>XPath functions return results of various data types</li>
					<ul>
						<li>boolean: <code>boolean, contains, false, lang, not, starts-with, true</code></li>
						<li>number: <code>ceiling, count, floor, last, number, position, round, string-length, sum</code></li>
						<li>string: <code>concat, local-name, name, namespace-uri, normalize-space, string, substring, substring-after, substring-before, translate</code></li>
						<li>node set: <code>id</code></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Using Functions</title>
				<ul>
					<li>Functions and location paths are orthogonal</li>
					<ul>
						<li>each construct may be based on the other</li>
						<li>it is possible to nest them arbitrarily</li>
						<li>predicates often contain functions</li>
						<li><code>//a[substring(@href,string-length(@href)-2)='pdf']</code></li>
					</ul>
					<li>XPaths can become powerful and complex</li>
					<ul>
						<li>writing some code or thinking about an XPath?</li>
						<li>XPaths are more declarative</li>
						<li>they may be more robust against changes in the XML schema</li>
						<li>they can be optimized by a smart XPath implementation</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Limitations of XPath</title>
			<slide>
				<title>XPath Selects</title>
				<ul>
					<li>Query languages select and recombine</li>
					<ol>
						<li>look up all addresses by zip code</li>
						<li>for each zip code, count the number of addresses</li>
					</ol>
					<li>XSLT fills in the missing parts (as a programming language)</li>
					<ul>
						<li>XSLT can construct XML and re-apply XPath</li>
					</ul>
					<li>XQuery fills in the missing parts (query-wise)</li>
					<ul>
						<li>80% of XQuery are XPath (in version 2.0, though)</li>
						<li>the remaining 20% are bindings, constructors, and glue</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XPath is Important</title>
				<ul>
					<li>XPath is a basic tool of the XML toolbox</li>
					<li>XPath is reused in various XML technologies</li>
					<li>XPath selects parts of an XML document</li>
					<li>XPath can do more general things by using expressions</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt1" cover="slidycover">
		<title short="XSLT 1">XML Transformations (XSLT) — Part I</title>
		<date short="2006-09-21">Thursday, September 21, 2006</date>
		<toc id="reading"/>
		<toc id="resources"/>
		<toc id="abstract">Because XML can be used to represent any vocabulary (often defined by some schema), the question is how these different vocabularies can be processed and maybe transformed into something else. This <q>something else</q> maybe another XML vocabulary (a common requirement in B2B scenarios), or it may be HTML (a common scenario for Web publishing). Using <em>XSL Transformations (XSLT)</em>, mapping tasks can be implemented easily. XSLT leverages XPath's expressive power in a rather simple programming language. For easy tasks, XSLT mapping can be specified without much real <q>programming</q> going on, by simply specifying how components of the source markup are mapped to components of the target markup.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>XPath and XSLT</title>
			<ul>
				<li>XPath is an expression language</li>
				<ul>
					<li>location paths let you select part of an XML document tree</li>
					<li>expressions in general may other data types as well (string, number, boolean)</li>
				</ul>
				<li>XSLT is a programming language based on XPath</li>
				<ul>
					<li>XSLT defines the structures for the control flow within the program</li>
					<li>in all the places where something is evaluated, XPaths are being used</li>
					<li>sometimes, one can substitute for the other</li>
				</ul>
			</ul>
			<listing src="xslt-vs-xpath.xsl" line="5-13"/>
		</slide>
		<slide>
			<title>XSLT Executive Summary</title>
			<ul>
				<li>XSLT is an XML-oriented programming language</li>
				<li>XSLT uses XML as its syntax</li>
				<li>XSLT is a weakly typed language</li>
				<li>XSLT is not designed for large programming tasks</li>
				<li>XSLT is the standard language for XML-to-XML transformations</li>
				<li>XSLT is very simple and often too simple</li>
				<li>XSLT 2.0 is much more complex and powerful</li>
			</ul>
		</slide>
		<slide>
			<title>XSLT as a Programming Language</title>
			<ul>
				<li>XSLT is a functional programming language</li>
				<ul>
					<li>fundamentally different from the usual languages</li>
					<li>not important for very simple mapping applications</li>
					<li>important for writing more complex transformations</li>
					<li>hard to get used to for procedurally trained people</li>
				</ul>
				<li>XSLT has built-in behavior for tree traversal</li>
				<ul>
					<li>XPaths allows you to select parts of the document tree</li>
					<li>XSLT's default behavior is to traverse the complete tree</li>
					<li>the idea of <q>default behavior</q> may seem strange</li>
				</ul>
			</ul>
		</slide>
		<part id="xslt-examples">
			<title>Simple Examples</title>
			<slide>
				<title>My First XSLT</title>
				<ul>
					<li>XSLT uses a simple environment</li>
					<ul>
						<li>all you need is an <em>XSLT processor</em> (<a href="http://saxon.sourceforge.net/">Saxon</a> recommended)</li>
					</ul>
					<li>Some interesting observations</li>
					<ul>
						<li>it is an XML document (using the <a href="http://www.w3.org/TR/xslt#xslt-namespace">XSLT Namespace</a>)</li>
						<li>it contains no visible code (no statements)</li>
						<li>when being applied (i.e., executed), it produces a result</li>
					</ul>
				</ul>
				<listing src="first.xsl"/>
			</slide>
			<slide>
				<title>Why does it Work?</title>
				<ul>
					<li>The <q>text</q> of the document is produced</li>
					<ul>
						<li>technically, it is the concatenation of all text nodes</li>
						<li>this works with all XML input documents</li>
					</ul>
					<li>XSLT by default traverses the document tree</li>
					<ul>
						<li>it copies all text nodes</li>
						<li>it works its way through the document recursively</li>
						<li>this behavior is unusual for a programming language</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>My Second XSLT</title>
				<listing src="second.xsl"/>
			</slide>
			<slide>
				<title>How does it Work?</title>
				<ul>
					<li>Text output rather than XML output</li>
					<li>Overriding the default behavior</li>
					<ul>
						<li>new rules for how to recurse through the document tree</li>
						<li>the rules are <q>applied</q> <em>by the XSLT processor</em></li>
						<li>the execution of the XSLT code is controlled <em>by the XSLT processor</em></li>
					</ul>
					<li>Traversing the document tree in XSLT is easy</li>
					<ul>
						<li>this is what XSLT has been designed for</li>
						<li>trying to avoid this pattern leads to bad code and bad results</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>My Third XSLT</title>
				<listing src="third.xsl" line="3-21"/>
			</slide>
			<slide>
				<title>How Mappings Work</title>
				<ul>
					<li>All non-XSLT elements are <em>literal result elements</em></li>
					<ul>
						<li>their content is processed as usual</li>
						<li>they may contain XSLT or literal result elements</li>
					</ul>
					<li>XSLT elements in the stylesheet are instructions</li>
					<ul>
						<li>they are executed and have some predefined behavior</li>
						<li>if they produce results, these go to the result tree as well</li>
					</ul>
					<li>One-template XSLT is a good way to start with XSLT</li>
					<ul>
						<li>avoiding the learning curve associated with <link href="xslt-templates"/></li>
						<li>for easy mapping tasks, this pattern often is sufficient</li>
						<li>for complex tasks, this is the XSLT equivalent of <q>spaghetti code</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title><q>Hello World</q> in XSLT</title>
				<ul>
					<li>XSLT always transforms an XML document</li>
					<ul>
						<li>this is hard-coded in the <link href="xslt-model"/></li>
					</ul>
					<li>Simply generating output is impossible</li>
					<ul>
						<li><q>hello world</q> therefore ignores the input</li>
						<li>anything can be the input (including the XSLT itself)</li>
					</ul>
				</ul>
				<listing src="helloworld.xsl"/>
			</slide>
		</part>
		<part>
			<title>XSLT Instructions</title>
			<slide>
				<title>XSLT is RISC</title>
				<ul>
					<li>XSLT has a <a href="http://www.w3.org/TR/xslt#element-syntax-summary">small set of instructions</a></li>
					<ul>
						<li>the language was designed to run in a restricted environment</li>
						<li>the language was designed for a specific task</li>
						<li>much of the languages power lies in XPath</li>
					</ul>
					<li>XPath is the CISC part of XSLT</li>
					<ul>
						<li>XPath is a complex high-level language</li>
						<li>it is specialized for the task the language is designed to do</li>
						<li>it can be highly optimized</li>
						<li>writing the XPaths often is the most challenging part of XSLT</li>
					</ul>
					<li>Starting with XSLT should improve simple mappings</li>
				</ul>
			</slide>
			<slide id="xslt-iterations">
				<title>Iterations</title>
				<ul>
					<li>XSLT can only iterate over node sets</li>
					<ul>
						<li>any other problem has to be solved recursively</li>
						<li>iterating over node sets often is what you want to do</li>
					</ul>
					<li>Applying the same code to all of the nodes</li>
					<ul>
						<li>works great if all nodes require the same processing</li>
						<li>is of limited use when processing needs to be conditional</li>
					</ul>
				</ul>
				<listing src="third.xsl" line="11-17"/>
			</slide>
			<slide id="xslt-conditionals">
				<title>Conditional Instructions</title>
				<ul>
					<li>Programming languages usually provide if-then-else</li>
					<ul>
						<li>XSLT has an if-then: <xslte>if</xslte></li>
						<li>and a if-then-(elif-then)*-else: <xslte>choose</xslte></li>
					</ul>
					<li>Simple handling of special cases</li>
					<ul>
						<li>having few and reasonably sized conditionals is ok</li>
						<li>having deeply nested and very long conditionals is a problem</li>
						<li>as in all programming languages, the latter case should user other mechanisms</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>My Third XSLT (II)</title>
				<listing src="third-if.xsl" line="10-22"/>
			</slide>
			<slide>
				<title>My Third XSLT (III)</title>
				<listing src="third-choose.xsl" line="20-38"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSLT is Simple</title>
				<ul>
					<li>XSLT is a simple programming language</li>
					<li>XSLT's processing model is useful but unusual</li>
					<li>XPath competence is essential for XSLT</li>
					<li>Programming requires practice</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt2" cover="slidycover">
		<title short="XSLT 2">XML Transformations (XSLT) — Part II</title>
		<date short="2006-09-26">Tuesday, September 26, 2006</date>
		<toc id="reading"/>
		<toc id="resources"/>
		<toc id="abstract">XSLT processes documents by matching nodes in the document tree to <em>templates</em>, which then are executed to process these nodes. This process of matching and executing templates is the core of XSLT's processing model. XSLT has built-in templates which complement the user-supplied templates, so that the XSLT processor always finds a template to execute. Templates can conflict, and it is then necessary to resolve this conflict by finding the <q>best match</q> of all matching templates. This <em>conflict resolution</em> process also is a very important component of the XSLT processing model.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>XSLT Programming</title>
			<ul>
				<li>Simple mappings can be defined in one template</li>
				<ul>
					<li>the template creates the result document's structure</li>
					<li><link href="xslt-iterations"/> and <link href="xslt-conditionals"/> provide some flexibility for processing</li>
					<li>the resulting code is always <q>spaghetti code</q></li>
				</ul>
				<li>Non-trivial XSLT programs use more than one template</li>
				<ul>
					<li>different templates are responsible for mapping subtrees of the input document</li>
					<li>the whole process is <em>driven by the document</em></li>
					<li>XSLT programming needs some time to get used to</li>
				</ul>
				<li>Like every tool, XSLT can be misused</li>
				<ul>
					<li>for simple problems, XSLT can be used like a regular programming language</li>
					<li>for harder problems, this is impossible (missing language constructs)</li>
				</ul>
			</ul>
		</slide>
		<part id="xslt-model">
			<title>XSLT Processing Model</title>
			<slide>
				<title>Input and Output</title>
				<img style="width : 90% ; margin : 2% ; " src="xslt-model.png"/>
			</slide>
			<part id="xslt-templates">
				<title>Templates</title>
				<slide>
					<title>Templates as Building Blocks</title>
					<ul>
						<li>Templates are the main unit of code</li>
						<ul>
							<li>the <xslta>match</xslta> attribute defines which nodes are processed by a template</li>
							<li>whenever such a node needs to be processed, the template is executed (<q>applied</q>)</li>
							<li>XPaths are interpreted with the matched node as context</li>
						</ul>
						<li>Templates contain a mix of <link href="xslt-literal"/> and XSLT code</li>
						<ul>
							<li><link href="xslt-literal"/> and tex nodes are copied to the result tree</li>
							<li>XSLT elements are executed (depending on their semantics)</li>
							<li><xslte>apply-templates</xslte> plays a special role because it selects nodes to be processed</li>
						</ul>
						<li>The template application process is special</li>
						<ul>
							<li>probably the most challenging aspect when learning the language</li>
							<li>XSLT is much easier to use when understanding the underlying principle</li>
						</ul>
					</ul>
				</slide>
				<slide id="xslt-algorithm">
					<title>Basic Mechanics</title>
					<ol>
						<li>The <em>source node list</em> contains only the root node</li>
						<li>The result tree is created by inserting the result from processing a node from the source node list</li>
						<li>Processing typically puts more nodes on the source node list</li>
						<li>The process is repeated until the source node list is empty</li>
					</ol>
					<listing src="second.xsl" line="5-15"/>
				</slide>
				<slide>
					<title>Template Selection</title>
					<ul>
						<li>Templates are connected through two statements</li>
						<ul>
							<li><xslte>apply-templates</xslte> selects which are put on the source node list</li>
							<li>the XSLT processor selects the best <xslte>template</xslte> and executes it</li>
						</ul>
						<li>What happens if there is no template?</li>
						<ul>
							<li>templates use <link href="xslt-pattern"/> to specify their applicability</li>
							<li>users may not specify a template for a node they select</li>
							<li>instead of an error, <link href="xslt-builtin"/> are used to handle this situation</li>
						</ul>
					</ul>
				</slide>
				<slide id="xslt-pattern">
					<title>Patterns</title>
					<ul>
						<li>Patterns are a subset of XPath</li>
						<ul>
							<li>they are used to specify to which nodes certain language constructs apply</li>
							<li>patterns specify a set of conditions on a node</li>
						</ul>
						<li>The specification is short, but hard to understand</li>
						<ul>
							<li><a href="http://www.w3.org/TR/xslt#patterns"><q>A node matches a pattern if the node is a member of the result of evaluating the pattern as an expression with respect to some possible context; the possible contexts are those whose context node is the node being matched or one of its ancestors.</q></a></li>
						</ul>
						<li>Practically, patterns are node tests, node contexts, and predicates</li>
						<ul>
							<li><q><code>*</code></q> matches any element</li>
							<li><q><code>tr</code></q> matches <elem>tr</elem> elements</li>
							<li><q><code>thead/tr</code></q> matches <elem>tr</elem> elements within <elem>thead</elem> elements</li>
							<li><q><code>p[@class='warning']</code></q> matches <elem>p</elem> elements with their <xml>class</xml> set to <code>warning</code></li>
							<li>these mechanisms can be combined (and connected by the union operator <q><code>|</code></q>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Pattern-Based Processing</title>
					<listing src="people-patterns.xsl" line="10-29"/>
				</slide>
			</part>
			<part id="xslt-builtin">
				<title>Built-In Templates</title>
				<slide>
					<title>XSLT Default Behavior</title>
					<ul>
						<li>Built into every XSLT processor</li>
						<ul>
							<li>covering all seven XPath node types</li>
							<li>the XSLT processor always finds a template to process a node</li>
						</ul>
						<li>Conflicts are thus also built into the language</li>
						<ul>
							<li>every user template is in conflict with a built-in template</li>
							<li><link href="xslt-conflictresolution"/> is a core concept of XSLT</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Root and Elements</title>
					<ul>
						<li>The most important node types</li>
						<ul>
							<li>every XML document has a root and at least one element</li>
						</ul>
						<li>The default behavior traverses the tree recursively</li>
						<ul>
							<li>the recursion only selects child nodes (the default is <xml>select="node()"</xml>)</li>
							<li>attributes are <u>not</u> children of the elements nodes!</li>
						</ul>
					</ul>
					<listing src="built-in.xsl" line="4-6"/>
					<listing src="first.xsl"/>
				</slide>
				<slide>
					<title>Text and Attributes</title>
					<ul>
						<li>These nodes create text output</li>
						<li>the processing does not continue with <xslte>apply-templates</xslte></li>
						<ul>
							<li>text and attribute nodes are always leaf nodes</li>
						</ul>
						<li>Attributes are not selected by the built-in rules</li>
						<ul>
							<li>they are only processed when selected by a user instruction</li>
						</ul>
					</ul>
					<listing src="built-in.xsl" line="12-14"/>
				</slide>
				<slide>
					<title>Processing Instructions and Comments</title>
					<ul>
						<li>These nodes are ignored</li>
						<li>Processing instructions and comments are selected by the built-in rules</li>
						<ul>
							<li>the built-in behavior can be overwritten if required</li>
						</ul>
					</ul>
					<listing src="built-in.xsl" line="16-16"/>
				</slide>
			</part>
			<part id="xslt-conflictresolution">
				<title>Conflict Resolution</title>
				<slide>
					<title>Template Selection</title>
					<ul>
						<li>XSLT processes <link href="xslt-algorithm">nodes on the source node list</link></li>
						<li>For processing each node, the <q>best</q> template must be found</li>
						<li>XSLT supports incremental development</li>
						<ul>
							<li>templates can be added for more specialized processing</li>
							<li>other code does not have to be changed at all</li>
							<li>the source node list provides support for this decoupling</li>
						</ul>
						<li>For simple cases, the default mechanism is sufficient</li>
						<ul>
							<li>advanced XSLT programming sometimes requires manual intervention</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Template Selection</title>
					<ol>
						<li>All templates with a <xslta>match</xslta> attribute</li>
						<ul>
							<li>this excludes <link href="xslt-named-templates"/></li>
						</ul>
						<li>All templates with the same <em>mode</em></li>
						<ul>
							<li>part of the <xslte>apply-templates</xslte> instruction selecting the node</li>
						</ul>
						<li>The <link href="xslt-pattern">Pattern</link> must match</li>
						<li>If more than one template matches, order by <em>import precedence</em></li>
						<ul>
							<li>the import tree of the stylesheet is considered (this includes the built-in rules)</li>
						</ul>
						<li>If more than one template matches, order by <em>priority</em></li>
						<ul>
							<li>this sorts rules according to the specificity</li>
						</ul>
						<li>Execute resulting rule</li>
						<ul>
							<li>if still more than one, signal error or execute last in stylesheet</li>
						</ul>
					</ol>
				</slide>
				<slide>
					<title>Import Precedence</title>
					<img style="margin : 4% ; width : 90% ; " src="xslt-import-precedence.png"/>
				</slide>
				<slide>
					<title>Priorities</title>
					<ul>
						<li>Template priorities are computed</li>
						<ul>
							<li>a very simple pattern-based process</li>
							<li>a higher value means it is a better match</li>
						</ul>
						<li>Five steps are used to compute the priority</li>
						<ol>
							<li>templates using the union operator are treated as if there were multiple templates</li>
							<li>QNames and processing instructions are assigned a priority of <code>0</code></li>
							<li>Namespace-prefixed names are assigned a priority of <code>0.25</code></li>
							<li>other node tests with axis specifiers are assigned a priority of <code>-0.25</code></li>
							<li>all other patterns are assigned a priority of <code>0.5</code></li>
						</ol>
					</ul>
				</slide>
				<slide>
					<title>Different Conflicts</title>
					<listing src="conflict-resolution.xsl"/>
					<listing src="conflict-resolution.xml"/>
				</slide>
				<slide>
					<title>Resolution Process</title>
					<table style="margin : 4% ; width : 90% ; " rules="groups">
						<colgroup span="1"/>
						<colgroup span="1"/>
						<colgroup span="5"/>
						<colgroup span="1"/>
						<thead>
							<tr>
								<th valign="bottom" rowspan="2">Pattern</th>
								<th valign="bottom" rowspan="2">Priority</th>
								<th colspan="5">Resolution Step</th>
								<th valign="bottom" rowspan="2">Manual<br/>Adjustment</th>
							</tr>
							<tr>
								<th>1</th>
								<th>2</th>
								<th>3</th>
								<th>4</th>
								<th>5</th>
							</tr>
						</thead>
						<tbody>
							<tr>
								<td align="right">Built-in: <q><code>text() | @*</code></q></td>
								<td/>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center"></td>
								<td align="center"></td>
								<td align="center"></td>
								<td align="center"></td>
							</tr>
							<tr>
								<td align="right">Built-in: <q><code>* | /</code></q></td>
								<td/>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center"></td>
								<td align="center"></td>
								<td align="center"></td>
							</tr>
							<tr>
								<td align="right"><q><code>*</code></q></td>
								<td align="center">-0.5</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center"></td>
								<td align="center"></td>
							</tr>
							<tr>
								<td align="right"><q><code>a</code></q></td>
								<td align="center">0.0</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center"></td>
								<td align="center"></td>
							</tr>
							<tr>
								<td align="right"><q><code>b/a</code></q></td>
								<td align="center">0.25</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center"></td>
							</tr>
							<tr>
								<td align="right"><q><code>c/b/a</code></q></td>
								<td align="center">0.25</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center">✓</td>
								<td align="center"><code>priority="1"</code></td>
							</tr>
						</tbody>
					</table>
				</slide>
				<slide>
					<title>Adjusting Priorities</title>
					<ul>
						<li>Computed priorities always lie between <code>-0.5</code> and <code>0.5</code></li>
						<li>Non-trivial patterns almost always have the priority <code>0.5</code></li>
						<li>Priorities can be set explicitly</li>
						<ul>
							<li><xslte>template match="..." priority="1"</xslte></li>
						</ul>
						<li>Managing priority values is up to the programmer</li>
						<ul>
							<li>it is rarely necessary to manage a large set of competing priorities</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>How to Iterate</title>
			<slide>
				<title>Processing Nodes in XSLT</title>
				<ul>
					<li>XSLT supports to ways of processing nodes</li>
					<ul>
						<li><link href="xslt-iterations"/> loop over a set of selected nodes</li>
						<li><link href="xslt-templates"/> process nodes which have been put on the source node list</li>
					</ul>
					<li>Both mechanisms handle similar situations</li>
					<ul>
						<li>a set of nodes is selected and should be processed</li>
						<li>the code for processing has to available in a code block</li>
						<li><link href="xslt-iterations"/> put this code in the <xslte>for-each</xslte> body</li>
						<li><link href="xslt-templates"/> put this code in a reusable building block</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Homogeneous Processing</title>
				<ul>
					<li><link href="xslt-iterations"/> may lead to less modular code</li>
					<li>If the code has to be reused, they may not be a good solution</li>
					<ul>
						<li><link href="xslt-named-templates"/> may provide some support for reuse</li>
					</ul>
					<li>The selected nodes should require similar processing</li>
					<ul>
						<li>otherwise, the iteration code will contain many conditional statements</li>
					</ul>
					<li>Iterations should be restricted to small units of code</li>
				</ul>
			</slide>
			<slide>
				<title>Heterogeneous Processing</title>
				<ul>
					<li>If the node processing is very different, templates are better</li>
					<ul>
						<li>different templates are written for all nodes being selected</li>
						<li>no conditional code has to be written, selection is done by matching nodes to template patterns</li>
					</ul>
					<li>Templates can be reused</li>
					<ul>
						<li>the nodes appear in different locations and should be processed consistently</li>
						<li>the matching mechanism provides the ideal support for this scenario</li>
					</ul>
					<li>Extensible code should always use templates</li>
					<ul>
						<li>other stylesheets can import an existing stylesheet</li>
						<li>by selectively <q>overwriting</q> templates, the behavior can be customized</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Calling Templates</title>
			<slide>
				<title>Executing Templates</title>
				<ul>
					<li><link href="xslt-templates"/> usually have a <xml>match</xml> attribute</li>
					<ul>
						<li>these templates are part of XSLT's special pattern matching processing</li>
					</ul>
					<li>Templates may also be named units of code</li>
					<ul>
						<li>there is nothing special about these templates</li>
						<li>they are being called using a name like regular procedures</li>
					</ul>
				</ul>
			</slide>
			<slide id="xslt-named-templates">
				<title>Named Templates</title>
				<ul>
					<li><xslte>template</xslte> may also carry a <xml>name</xml> attribute</li>
					<li>Named templates have none of the special properties of XSLT template matching</li>
					<ul>
						<li>they are called by their name just like regular procedures</li>
						<li>they do not change the context of XPath evaluation</li>
					</ul>
					<li>Named templates are useful for modularizing code which is not tied to node types</li>
					<ul>
						<li>in most cases, they are called using <link href="xslt-parameters"/></li>
						<li>a typical application is the implementation of a facility for printing messages</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Document-Driven Transformations</title>
				<ul>
					<li>XSLT often requires <em>document-driven</em> programming</li>
					<li>Imperative programmers are more used to control the program flow</li>
					<li>Document-driven processing is a powerful design principle</li>
					<li>Complex (highly variable) documents are much better handled by document-driven processing</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xslt3" cover="slidycover">
		<title short="XSLT 3">XML Transformations (XSLT) — Part III</title>
		<date short="2006-09-28">Thursday, September 28, 2006</date>
		<toc id="reading"><a href="http://www-128.ibm.com/developerworks/xml/library/x-tipxsltrun/">XSLT Parameters</a></toc>
		<toc id="resources"/>
		<toc id="abstract">Advanced XSLT processing includes better control of the input and output documents, which can finely controlled in terms of how whitespace is treated. Another interesting feature of XSLT are <em>keys</em>, which allow shorthand notations for frequently used access paths to nodes, and provide XSLT processors with more information for performance optimizations. Instructions for creating all possible kinds of nodes in the output tree make it possible to write code which generates element or attribute names based on runtime evaluations.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>XSLT Core Concepts</title>
			<ul>
				<li>XSLT can be used for very simple matching tasks</li>
				<ul>
					<li>a mostly static result tree can be produced</li>
					<li>XPaths can be used to fill in parts of the result tree</li>
					<li><link href="xslt-iterations"/> and <link href="xslt-conditionals"/> provide some flexibility for processing</li>
				</ul>
				<li>More complex transformation require a different approach</li>
				<ul>
					<li>instead of static structures, nodes are individually mapped to small structures</li>
					<li>these structure fragments together produce the result tree</li>
					<li>the process is <em>document-driven</em> and based on the <link href="xslt-model"/></li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Variables and Parameters</title>
			<slide>
				<title>Programming Language Basics</title>
				<ul>
					<li>Variables in programming languages have different purposes</li>
					<ol>
						<li>defining a <em>name</em> for something so that it can be referred to</li>
						<li>associating this <em>name</em> with a <em>value</em> so that the value can be used</li>
						<li>providing a way to <em>update</em> the variable so that its value changes</li>
					</ol>
					<li>Variables in functional languages cannot change</li>
					<ul>
						<li>they are <em>immutable</em> (often called <em>constants</em> in other languages)</li>
						<li>more specifically, they are <em>dynamic constants</em> (i.e., can be computed at runtime)</li>
						<li>they are defined by giving them a <xpath>name</xpath> and referring to them by <xpath>$name</xpath></li>
					</ul>
					<li>Variables in XSLT have no type (no static type checking possible)</li>
					<ul>
						<li>the value that they have is typed</li>
						<li>but a variable may have values of any type</li>
					</ul>
				</ul>
				<pre><![CDATA[<xsl:variable name="sum" select="$op1 + $op2"/>
<xsl:variable name="result" select="$sum * $factor"]]></pre>
			</slide>
			<part id="xslt-variables">
				<title>Variables</title>
				<slide>
					<title>Why Variables?</title>
					<ul>
						<li>Reuse of values in different locations</li>
						<ul>
							<li>texts required for the transformation</li>
							<li>facilitates better separation of structure and content</li>
						</ul>
						<pre><![CDATA[<xsl:value-of select="$email-prefix"/> <!-- $email-prefix = 'You have ' -->
<xsl:value-of select="count(//message)"/>
<xsl:value-of select="$email-suffix"/> <!-- $email-suffix = ' e-mail messages.' -->]]></pre>
						<li>Using the correct context is essential</li>
						<ul>
							<li>variables cannot be updated</li>
							<li>if they need to be <q>updated</q>, they have to be re-created</li>
						</ul>
						<li>Why are they called <q>variables</q> if they are constants?</li>
						<ul>
							<li>their value varies in different invocations of the context</li>
							<li>they are computed at runtime (dynamic constants) rather than statically</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Scope and Extent</title>
					<ul>
						<li>Variables can be global or local</li>
						<ul>
							<li>global variables are visible in all templates</li>
							<li>local variables are visible in their context (i.e., at <xpath>following-sibling::*/descendant-or-self::*</xpath>)</li>
							<li>local variables are allowed to <em>shadow</em> global (not local) variables</li>
						</ul>
						<li>Variable values may be assigned using the <xslta>select</xslta> attribute</li>
						<ul>
							<li>The XPath's result is the value of the variable</li>
						</ul>
						<li>Variables can contain arbitrary XPath code</li>
						<ul>
							<li>the code is executed in the same way as when constructing the result tree </li>
							<li>the <em>result tree fragment</em> is the value of the variable</li>
							<li>it can be used as a string (<xslte>value-of</xslte>) or as a tree (<xslte>copy-of</xslte>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Using Variables</title>
					<listing src="variable-assignment-wrong.xsl" line="4-15"/>
					<listing src="variable-assignment.xsl" line="4-17"/>
				</slide>
			</part>
			<part id="xslt-parameters">
				<title>Parameters</title>
				<slide>
					<title>Parameters vs. Variables</title>
					<ul>
						<li>Parameters are variables with additional semantics</li>
						<ul>
							<li>they are passed to their scope from the outside</li>
							<li>they are available within the scope like a variable (scopes are stylesheets and templates)</li>
							<li>like variables, they cannot be updated (and only global parameters can be shadowed)</li>
						</ul>
						<li>XSLT does not check proper parameter passing</li>
						<ul>
							<li>if a declared parameter is not passed, it gets a default value (specified or <xpath>''</xpath>)</li>
							<li>if a passed parameter is not declared, it is ignored</li>
							<li>like variables, parameters have no type (any value can be passed)</li>
							<li>XSLT's robustness makes it hard to spot programming errors</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Stylesheet Parameters</title>
					<ul>
						<li>Passed to the stylesheet when calling the stylesheet</li>
						<ul>
							<li>the exact way of specifying the parameters depend on the processor and the environment</li>
							<li>the passed values are available in the same way as global variables</li>
							<li>parameter checking has to be done by hand</li>
						</ul>
					</ul>
					<listing src="parameter-test.xsl" line="4-13"/>
				</slide>
				<slide>
					<title>Template Parameters</title>
					<ul>
						<li>Parameters can be passed to templates</li>
						<ul>
							<li>works with <xslte>apply-templates</xslte> and <xslte>call-template</xslte></li>
							<li><xslt>with-param</xslt> elements list the passed parameters</li>
							<li>parameter matching is done by name (there is no particular order to parameters)</li>
						</ul>
						<li>Templates can be programmed as parametrized components</li>
						<ul>
							<li>checking the signature has to be done by hand</li>
							<li><xslt>with-param</xslt> elements list the passed parameters</li>
							<li>parameter matching is done by name (there is no particular order to parameters)</li>
						</ul>
						<li>Parametrized template calls need a lot of markup</li>
						<ul>
							<li>XSLT's XML syntax makes the code hard to read</li>
						</ul>
					</ul>
					<pre>main param start = 1 ; param count = 10 ; {
	loop (0) };
loop param counter ; {
	print $start + $counter ;
	if ( $counter &lt; $count - 1) then 
		loop ($counter + 1) ; }</pre>
				</slide>
				<slide>
					<title>Parameter Passing</title>
					<listing src="parameters.xsl" line="4-21"/>
				</slide>
				<slide>
					<title>Message Facility</title>
					<listing src="message.xsl"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Controlling Documents</title>
			<slide>
				<title>XSLT Processing Model</title>
				<ul>
					<li>XSLT was built as a client-side language</li>
					<ul>
						<li>the browser has an XML document</li>
						<li>the XSLT is used to transform this XML</li>
						<li>the result is used for rendering the formatted document</li>
					</ul>
					<li>XSLT provides facilities for accessing additional documents</li>
					<ul>
						<li>an additional XML might contain localized texts for rendering</li>
						<li>like everything in XSLT, identification uses URIs</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Input Documents</title>
				<slide>
					<title>Opening Documents</title>
					<ul>
						<li>Initially, XSLT starts with the XPath node tree of the main document</li>
						<ul>
							<li>this step is outside of the control of the XSLT programmer</li>
						</ul>
						<li>Additional documents can be accessed using <xpath>document()</xpath></li>
						<ul>
							<li>the function accepts URIs, which are interpreted relative to the stylesheet</li>
							<li>only XML documents can be used, they will be parsed into an XPath tree</li>
						</ul>
						<li>XSLT Processors are smart enough to cache documents</li>
						<ul>
							<li>re-opening the same document will not re-parse it</li>
						</ul>
					</ul>
					<listing src="document.xsl" />
				</slide>
				<slide>
					<title>Whitespace in Documents</title>
					<ul>
						<li>Documents often contain many irrelevant whitespace text nodes</li>
						<ul>
							<li>many XML documents are pretty-printed for readability</li>
							<li>pretty-printing produces many line-feeds and tabs/spaces</li>
						</ul>
						<li>XSLT can be instructed to ignore whitespace nodes</li>
						<ul>
							<li><xslte>strip-space</xslte> lists all elements for which whitespace children should be ignored</li>
							<li>this may be a bit too much, because <link href="mixed-content"/> may contain significant whitespace</li>
						</ul>
						<pre><![CDATA[<p>do <u>not</u> <em>throw</em> <b>away</b> these whitespace nodes!</p>]]></pre>
						<li>XSLT can be instructed to preserve some whitespace nodes</li>
						<ul>
							<li><xslte>preserve-space</xslte> lists all elements for which whitespace children should be preserved</li>
							<li>usually, <xslte>preserve-space</xslte> lists the exceptions for <xslte>strip-space</xslte></li>
							<li>usually, <xslte>preserve-space</xslte> contains a list of all mixed content elements</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Controlling Whitespace</title>
					<listing src="strip-preserve.xsl" line="4-12"/>
				</slide>
			</part>
			<part>
				<title>Output Documents</title>
				<slide>
					<title>Serialization</title>
					<ul>
						<li>XSLT always produces a result tree</li>
						<ul>
							<li>stylesheet processing starts with an empty tree (root node only)</li>
							<li>XSLT code producing output then adds nodes to this tree</li>
							<li><xslte>text</xslte>, <xslte>value-of</xslte>, <xslte>copy-of</xslte>, <xslte>copy</xslte>, <xslte>element</xslte>, <xslte>attribute</xslte>, <xslte>comment</xslte>, <xslte>processing-instruction</xslte>, <link href="xslt-literal"/></li>
						</ul>
						<li>Serialization is the process of externalizing the final tree</li>
						<ul>
							<li><xslte>output</xslte> controls how the tree is serialized</li>
							<li><xml>xml</xml> writes the tree as an XML document</li>
							<li><xml>html</xml> writes the tree as an HTML document (<elem>img ...</elem> instead of <elem>img .../</elem>)</li>
							<li><xml>text</xml> writes the tree's <em>string value</em> (the concatenation of all text nodes)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Multiple Output Documents</title>
					<ul>
						<li>XSLT 1.0 does not support more than one output document</li>
						<ul>
							<li><xslte>message</xslte> is another output channel, but not a document</li>
							<li>this was one of the most requested features for language improvements</li>
						</ul>
						<li>How can stylesheets produce more than one document?</li>
						<ul>
							<li>XSLT 1.0 may produce one document which is then post-processed</li>
							<li>XSLT 2.0 offers language facilities for more than one output document</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Keys</title>
			<slide>
				<title>Document Access</title>
				<ul>
					<li>Some parts of documents may be accessed frequently</li>
					<ul>
						<li><xpath>//person[@ss = $ss]/name/surname</xpath> for getting a name by social security number</li>
						<li>costs depend on document size and access frequency</li>
						<li>the document structure has to be used in all places where the name is used</li>
					</ul>
					<li>Keys provide access to frequently used nodes</li>
					<ul>
						<li><xpath>key('ssKey', $ss)/name/surname</xpath> is based on a predefined access path (the key)</li>
						<li>very easy to optimize even for very simple XSLT processors</li>
						<li>easier to understand from the programmer's point of view</li>
					</ul>
					<li>For nested predicates, non-optimized evaluation is very expensive</li>
					<ul>
						<li><xpath>//reference[@crossref = //reference[@title = $title]/@name]</xpath></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Declaring and Using Keys</title>
				<ul>
					<li>
						<xslte>key</xslte> defines a key on the stylesheet's top level</li>
					<ul>
						<li><xslte>key name="ssKey" match="person" use="@ss"/</xslte></li>
						<li><xslta>name</xslta> is used for referring to the key (most people use <q><xml>...Key</xml></q>)</li>
						<li><xslta>match</xslta> selects all nodes which will be part of the key (i.e., accessible through it)</li>
						<li><xslta>use</xslta> selects the value(s) which will retrieve the nodes</li>
					</ul>
					<li>
						<xpath>key()</xpath> is used for retrieving nodes from a key</li>
					<ul>
						<li>the first argument specifies the name of the key (defined by <xslte>key name="..." ...</xslte>)</li>
						<li>the second argument specifies the value for which to look in that key</li>
						<li><xpath>key()</xpath> returns a node set (empty or any number of nodes)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML and XSLT for using a Key</title>
				<listing src="people.xml" line="2-12"/>
				<listing src="peoplekeys.xsl" line="4-6"/>
				<ul>
					<li>
						<xpath>key('preNameKey', 'Thomas')</xpath> ≡ <xpath>//name[pre = 'Thomas']</xpath>
					</li>
				</ul>
			</slide>
			<slide>
				<title>XSLT Key Structure</title>
				<table width="90%" cellpadding="10">
					<tr>
						<td>
							<table border="1" cellpadding="10">
								<tr>
									<th colspan="2">
										<xslt>preNameKey</xslt>
									</th>
								</tr>
								<tr>
									<th>Node</th>
									<th>Value</th>
								</tr>
								<tr>
									<td>[1] Erik Thomas Wilde</td>
									<td>Erik</td>
								</tr>
								<tr>
									<td>[1] Erik Thomas Wilde</td>
									<td>Thomas</td>
								</tr>
								<tr>
									<td>[2] Thomas Plagemann</td>
									<td>Thomas</td>
								</tr>
								<tr>
									<td>[3] Bob Glushko</td>
									<td>Bob</td>
								</tr>
							</table>
						</td>
						<td>
							<table border="1" cellpadding="10">
								<tr>
									<th colspan="2">
										<xslt>countryKey</xslt>
									</th>
								</tr>
								<tr>
									<th>Node</th>
									<th>Value</th>
								</tr>
								<tr>
									<td>[1a] Erik Thomas Wilde</td>
									<td>de</td>
								</tr>
								<tr>
									<td>[1b] iSchool/UCB</td>
									<td>us</td>
								</tr>
								<tr>
									<td>[2a] Thomas Plagemann</td>
									<td>de</td>
								</tr>
								<tr>
									<td>[2b] IFI/UIO</td>
									<td>no</td>
								</tr>
								<tr>
									<td>[3a] Bob Glushko</td>
									<td>us</td>
								</tr>
								<tr>
									<td>[3b] iSchool/UCB</td>
									<td>us</td>
								</tr>
							</table>						
						</td>
					</tr>
				</table>
			</slide>
			<slide>
				<title>Using Keys</title>
				<ul>
					<li>Finding nodes by intersecting <xpath>key()</xpath> results</li>
					<ul>
						<li><xpath>key()</xpath> always returns node sets</li>
						<li>interesting sets of nodes may be the intersection of several keys</li>
						<li>unfortunately, XPath does not provide an operator for set intersection</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Node Set Intersection</title>
				<p><xpath>$a[count(. | $b) = count($b)]</xpath>: Find all nodes in <code>$a</code> where the cardinality of <code>$b</code> does not change when adding this node to it. This means the node must be in <code>$b</code>, and it is in <code>$a</code> to start with.</p>
				<img src="xpath-intersection.png" style="width : 90% ; margin : 4% ; "/>
			</slide>
		</part>
		<part>
			<title>Generating Result Nodes</title>
			<slide id="xslt-literal">
				<title>Literal Result Elements</title>
				<ul>
					<li>Non-XSLT elements are copied to the result tree</li>
					<ul>
						<li>this is the most common way of producing nodes</li>
						<li>in this case, the nodes' names are hard-coded in the stylesheet</li>
					</ul>
					<li>Attributes are also copied to the result tree</li>
					<ul>
						<li>this means the attribute will always we there</li>
						<li>conditional creation of attributes needs other language constructs</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Producing Nodes Explicitly</title>
				<ul>
					<li>Element nodes can be produced by using <xslt>element</xslt></li>
					<ul>
						<li>the element <xstla>name</xstla> must be specified and can be computed</li>
						<li>additional instructions exist for all node types</li>
					</ul>
				</ul>
				<listing src="uppercaser.xsl" line="3-12"/>
			</slide>
		</part>
		<part>
			<title>Modularizing Stylesheets</title>
			<slide>
				<title>Including and Importing</title>
				<ul>
					<li>XSLT supports two ways of modularizing code</li>
					<ul>
						<li>including simply distributes code across multiple files</li>
						<li>importing creates a dependency and a hierarchy</li>
					</ul>
					<li><xslte>include</xslte> is mainly used for keeping files manageable</li>
					<ul>
						<li>it is used within managed projects</li>
					</ul>
					<li><xslte>import</xslte> is mainly used for reusing code from elsewhere</li>
					<ul>
						<li>it imports reused code and assigns this code a lower precedence</li>
						<li>local instructions can then overwrite (if required) some of the imported code</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Import Precedence</title>
				<img style="margin : 4% ; width : 90% ; " src="xslt-import-precedence.png"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XSLT in Practice</title>
				<ul>
					<li>XSLT is a simple programming language</li>
					<li>The processing model needs some time to get used to</li>
					<li>Sometimes the language is really too simple</li>
					<li>If you are really interested in XSLT, learn XSLT 2.0!</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xsd1" cover="slidycover">
		<title short="XSD 1">XML Schema — Part I</title>
		<date short="2006-10-03">Tuesday, October 3, 2006</date>
		<toc id="reading">Chapters 4.3 &amp; 4.4 (pp. 132-159)</toc>
		<toc id="resources"><a href="xsd-quickref.pdf">XML Schema QuickRef</a></toc>
		<toc id="abstract">XML Schema is the most popular schema language for XML today. It has been introduced to overcome some of the commonly observed limitations of DTDs, most notably the lack of typing. <em>Simple Types</em> describe content which is not structured by XML markup, which means it describes attribute values and element content. Simple types can be defined by deriving new types from existing types by using type restriction. <em>Complex Types</em> describe element content if this content is using attributes and/or element content other than only character data. Using XML Schema's type concepts, it is easier to represent model-level information in a schema, because type hierarchies can represent model-level specializations.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>Bad Names</title>
			<blockquote>XML Schema is a language for describing an XML schema.<br/>An XML schema can be defined using XML Schema.<br/>I would like to use XML Schema for my XML schema.</blockquote>
			<ul>
				<li>The two most awkward name choices in the XML arena:</li>
				<ol>
					<li><em>XML Schema</em>, which is simply <u>a</u> XML schema language (among many others)</li>
					<li><em>Open XML</em>, which is simply an XML language for encoding office documents</li>
				</ol>
				<li>Naming things means <q>getting into people's heads</q></li>
				<ul>
					<li>pretentious and all-embracing name choices serve a certain purpose</li>
					<li>a name is just a name, it has no meaning</li>
					<li><em>XSD</em> and <em>WXS</em> are two semi-official acronyms for XML Schema</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>What's Wrong With DTDs?</title>
			<ul>
				<li>DTDs do not support application-level datatypes</li>
				<ul>
					<li>XML for B2B is very data-centric and needs typing</li>
					<li>SGML was created for documents where typing was less important</li>
				</ul>
				<li>DTDs do not support any relationships between markup constructs</li>
				<ul>
					<li>content models cannot be reused</li>
					<li>attribute lists cannot be reused</li>
					<li>structural relationships cannot be exploited in the DTD</li>
					<li><link href="param-entity"/> are used as a hack to work around this limitation</li>
				</ul>
				<li>DTD + XML Namespaces = Bad idea!</li>
			</ul>
		</slide>
		<slide>
			<title>Different Levels of Semantics</title>
			<ul>
				<li>XML Schema's simple data type provide some semantics</li>
				<ul>
					<li>a formerly undescribed attribute can now be described as being a <xml>xs:date</xml></li>
					<li>it can be understood as being a date and inserted into a calendar</li>
					<li>but what kind of date is it? a birthday? an order date? a shipping date?</li>
					<li>a question of the <em>context</em> of where the <xml>xs:date</xml> appears</li>
				</ul>
				<li>XML Schema better supports model-level information</li>
				<ul>
					<li>however, XML Schema also only captures part of the application semantics</li>
					<li>an XML Schema is usually better than a DTD, because it contains types</li>
					<li>types provide information about the basic datatypes being used</li>
					<li>additional semantics (e.g., different kinds of dates) must be documented elsewhere</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Schema-Validation and Applications</title>
			<img src="schema-valid-documents.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<slide>
			<title>Validation and Typing</title>
			<ul>
				<li>XML Schema does two things at the same time:</li>
			</ul>
			<ol>
				<li>Validation checks for structural integrity (is the document <em>schema-valid</em>?)</li>
				<ul>
					<li>checking elements and attributes for proper usage (as with DTDs)</li>
					<li>checking element contents and attribute values for proper values</li>
				</ul>
				<li>Type annotations make the types available to applications</li>
				<ul>
					<li>instead of having to look at the schema, applications get the <em>Post-Schema Validation Infoset (PSVI)</em></li>
					<li>type-based applications (such as XSLT 2.0) can work on the typed instance</li>
				</ul>
			</ol>
		</slide>
		<part id="xsd-types">
			<title>XML Schema Types</title>
			<slide>
				<title>What is a Type?</title>
				<ul>
					<li>A type is a <em>set of values</em></li>
					<ul>
						<li>the values can be enumerated (<em>home, mobile, office</em>)</li>
						<li>the values can be described by extension (intervals, regular expressions)</li>
					</ul>
					<li>DTD have (almost) no types</li>
					<ul>
						<li>element content is always <xml>#PCDATA</xml> (any number of any characters)</li>
						<li>attributes most often are <xml>CDATA</xml>  (any number of any characters)</li>
						<li>attributes may have enumerated types (but no extensional types)</li>
						<li>attributes may use <link href="ididref"/></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XML Schema vs. DTD</title>
				<div style="margin : 2% ; ">
					<table width="90%" cellspacing="20">
						<col/>
						<colgroup span="2"/>
						<thead>
							<tr>
								<td/>
								<th>DTD</th>
								<th>XML Schema</th>
							</tr>
						</thead>
						<tbody>
							<tr>
								<th>Concepts</th>
								<td colspan="2" align="center">some conceptual model (formal/informal)</td>
							</tr>
							<tr>
								<th>Types</th>
								<td style="color : gray ; "><xml>ID/IDREF</xml> and (<xml>#P</xml>)<xml>CDATA</xml></td>
								<td>Hierarchy of Simple and Complex Types</td>
							</tr>
							<tr>
								<th>Markup Constructs</th>
								<td>Element Type Declarations<br/><xml>&lt;!ELEMENT order ...</xml></td>
								<td>Element Definitions<br/><xml>&lt;xs:element name="order"> ...</xml></td>
							</tr>
							<tr>
								<th>Instances (Documents)</th>
								<td colspan="2" align="center"><xml>&lt;order date=""> [ order content ] &lt;/order></xml></td>
							</tr>
						</tbody>
					</table>
				</div>
			</slide>
			<slide>
				<title>Document/Data Perspectives</title>
				<ul>
					<li>XML as documents is text interspersed with structure</li>
					<ul>
						<li>XML captures text structures that support document processing</li>
						<li>without these structures, the text remains usable (as unstructured text)</li>
						<li>structure is good, but not indispensable</li>
					</ul>
					<li>XML as data is structure filled with data</li>
					<ul>
						<li>programmers think about classes and objects, so they need types</li>
						<li>without structure, data-centric XML is completely useless</li>
						<li>programmers often view XML as wire format and types as the portal to their objects</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xsd-simple-types">
			<title>Simple Types</title>
			<slide>
				<title>What are Simple Types?</title>
				<ul>
					<li>Simple types describe values not structured by XML markup</li>
					<ul>
						<li>they describe attribute values (<xml>date="2006-10-03"</xml>)</li>
						<li>they describe element content (<code>&lt;phone>+1-510-6432253&lt;/phone></code>)</li>
					</ul>
					<li>Simple types can be used for elements or attributes</li>
					<ul>
						<li>XML Schema treats contents in elements and attributes equally</li>
						<li>simple type libraries can be designed independent of their eventual use</li>
					</ul>
					<li>Simple types are available in three flavors</li>
					<ul>
						<li><em>atomic types:</em> one value of one type (one number in some range)</li>
						<li><em>union types:</em> one value of a union of types (a number or the string <q><code>undefined</code></q>)</li>
						<li><em>list types:</em> a whitespace-separated list of values (<elem>phone type="home office"</elem>)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Named vs. Anonymous</title>
				<ul>
					<li>Types can be <em>named</em> or <em>anonymous</em></li>
					<ul>
						<li>named types have a name and can be referenced (and thus be reused)</li>
						<li>anonymous types have no name and can only be used where they are defined</li>
					</ul>
				</ul>
				<listing src="named-anonymous-simple.xsd" line="3-9"/>
				<listing src="named-anonymous-simple.xsd" line="17-23"/>
			</slide>
			<slide>
				<title>Type Definitions</title>
				<ul>
					<li>Simple types are sets of values</li>
					<ul>
						<li>named simple types are sets of values with a name (and thus reusable)</li>
						<li>anonymous simple types are sets of values defined where they are needed</li>
					</ul>
					<li>Simple types are defined to represent model-level information</li>
					<ul>
						<li>in most cases, they will have restrictions associated with them</li>
						<li>they may also simply be tags for semantics (fax and phone numbers share the same value space)</li>
					</ul>
					<li>XML Schema has a library of <em>built-in datatypes</em></li>
					<ul>
						<li><em>ur-types</em> are the conceptual grounding of all types</li>
						<li><em>primitive types</em> are the types that are there <q>by definition</q></li>
						<li><em>derived types</em> are based on primitive types</li>
						<li>users can derive their own types using <em>simple type restriction</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Type Hierarchy</title>
				<img style="margin : 2% ; height : 75%" src="xsd-type-hierarchy.gif"/>
			</slide>
			<part>
				<title>Simple Type Restrictions</title>
				<slide>
					<title>Built-In Types</title>
					<listing src="built-in.xsd"/>
				</slide>
				<slide>
					<title>How to Restrict</title>
					<ul>
						<li>Simple types can be derived by restriction</li>
						<ul>
							<li>the <em>base type</em> must be a simple type</li>
							<li>the <em>derived type</em> will be a simple type</li>
							<li>all simple types form a tree, rooted ad the <code>anySimpleType</code></li>
						</ul>
						<li>Restriction are based on facets</li>
						<ul>
							<li>each restriction can use <em>0-n</em> facets</li>
							<li>facets can be refined in further simple type restrictions</li>
							<li>XML Schema designers should try to restrict types as much as possible</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Facets</title>
					<ul>
						<li>Facets define a certain way of restricting a simple type</li>
						<ul>
							<li>facets are independent, but they may interact (<code>minLength</code> and <code>maxLength</code>)</li>
							<li>XML Schema defines 12 <em>constraining facets</em> which may be used for restrictions</li>
							<li><code>length, minLength, maxLength, pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minExclusive, minInclusive, totalDigits, fractionDigits</code></li>
						</ul>
						<li>Facets may be repeated in different levels of the type hierarchy</li>
						<ul>
							<li>they may only further restrict the facet (e.g., reducing the <code>maxLength</code>)</li>
							<li>facets apply to all directly or indirectly derived subtypes</li>
							<li>facets may be <em>fixed</em> (no further restriction is allowed)</li>
						</ul>
						<li>Not all facets are applicable to all types</li>
						<ul>
							<li>the applicability depends on the <em>primitive type</em> being used</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Facet Applicability</title>
					<div style="margin : 2% ; ">
						<table width="90%">
							<tr>
								<th align="right" valign="top"><code>string</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>boolean</code></th>
								<td>pattern, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>float</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>double</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>decimal</code></th>
								<td>totalDigits, fractionDigits, pattern, whiteSpace, enumeration, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>duration</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>dateTime</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>time</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>date</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gYearMonth</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gYear</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gMonthDay</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gDay</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>gMonth</code></th>
								<td>pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minInclusive, minExclusive</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>hexBinary</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>base64Binary</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>anyURI</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>QName</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
							<tr>
								<th align="right" valign="top"><code>NOTATION</code></th>
								<td>length, minLength, maxLength, pattern, enumeration, whiteSpace</td>
							</tr>
						</table>
					</div>
				</slide>
				<slide>
					<title>Patterns</title>
					<ul>
						<li>Patterns restrict the <em>lexical space</em> of simple types</li>
						<ul>
							<li>most other facets restrict the <em>value space</em> (e.g., intervals of numbers)</li>
							<li>in many cases, patterns are useful additions to value-oriented facets</li>
						</ul>
						<li>Patterns are <a href="http://www.w3.org/TR/xmlschema-2/#regexs">regular expressions</a></li>
						<ul>
							<li>they support many common regex constructs and Unicode</li>
							<li>the language pattern allows <q><code>de</code></q>, <q><code>de-CH</code></q>, and other tags</li>
							<li>the pattern checks for lexical correctness, not against a code list</li>
						</ul>
					</ul>
					<pre>([a-zA-Z]{2}|[iI]-[a-zA-Z]+|[xX]-[a-zA-Z]{1,8})(-[a-zA-Z]{1,8})*</pre>
				</slide>
				<slide>
					<title>Simple Type Examples</title>
					<listing src="simple-examples.xsd"/>
				</slide>
				<slide>
					<title>Facet Limitations</title>
					<ul>
						<li>Facets limit one dimension of a type's value space</li>
						<ul>
							<li>using <code>pattern</code>, the lexical space can also be restricted</li>
							<li>restrictions should be made as specific as possible</li>
							<li>no limitations are possible beyond the predefined facets</li>
						</ul>
						<li>There is no connection to the context within the document</li>
						<ul>
							<li>facets cannot make references to other values (e.g., neighboring attributes)</li>
						</ul>
						<li>Additional constraints should be documented</li>
						<ul>
							<li>documentation enables applications to implement constraint checking</li>
							<li>other schema languages (like <link href="schematron"/>) may be used to express these constraints</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xsd-complex-types">
			<title>Complex Types</title>
			<slide>
				<title>What is a Complex Type?</title>
				<ul>
					<li>Complex types describe the allowed element content</li>
					<ul>
						<li>they describe what the element may contain (the element's <em>content model</em>)</li>
						<li>they describe the attributes that an element may have (the element's <em>attribute list</em>)</li>
					</ul>
					<li>Complex types do not define the element name</li>
					<ul>
						<li>the complex type defines which content is allowed for the element</li>
						<li>the element definition uses the complex type to define the allowed element content</li>
					</ul>
					<li>Complex types have similar properties to simple types</li>
					<ul>
						<li>they can be named or anonymous</li>
						<li><link href="xsd-complex-derivation"/> can be used to construct a type hierarchy</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Complex Type Example</title>
				<listing src="complex-example.xsd"/>
			</slide>
			<slide>
				<title>Complex Types &amp; Content Types</title>
				<ul>
					<li>Complex types can have different kinds of content</li>
					<ul>
						<li><em>simple content</em> refers to simple type content using additional attributes</li>
						<li><em>complex content</em> is anything else (anything beyond <em>simple type content</em>)</li>
					</ul>
					<li><link href="xsd-complex-derivation"/> heavily depends on this classification</li>
				</ul>
				<div style="margin : 2% ; ">
					<table width="90%" cellspacing="20" rules="all" frame="border">
						<tr>
							<th rowspan="3">Simple Types</th>
							<th colspan="4">Complex Types</th>
						</tr>
						<tr>
							<th rowspan="2">Simple Content</th>
							<th colspan="3">Complex Content</th>
						</tr>
						<tr>
							<td align="center">Element Only</td>
							<td align="center">Mixed</td>
							<td align="center">Empty</td>
						</tr>
					</table>
				</div>
			</slide>
			<part>
				<title>Content Models</title>
				<slide>
					<title>DTD Content Models</title>
					<ul>
						<li><link href="dtd-element"/> in DTDs uses a compact syntax</li>
						<ul>
							<li>XML Schema supports the same facilities with a more verbose syntax</li>
							<li>XML Schemas adds features which DTDs do not support</li>
						</ul>
						<li>DTDs allow elements to be mandatory, optional, repeatable, or optional and repeatable</li>						
						<ul>
							<li>XML Schema allows the cardinality to be specified</li>
						</ul>
						<li>DTDs allow sequences (<q><code>,</code></q>) and alternatives (<q><code>|</code></q>)</li>
						<ul>
							<li>XML Schema introduces a (very limited) operator for <em>all groups</em></li>
						</ul>
						<li>Apart from the syntax, XML Schema content models are not very different</li>
					</ul>
				</slide>
				<slide>
					<title>Mixed Content</title>
					<ul>
						<li>DTDs define mixed content by mixing <code>#PCDATA</code> into the content model</li>
						<ul>
							<li>DTDs always require mixed content to use the form <code>( #PCDATA | a | b )*</code></li>
							<li>the occurrence of elements in mixed content cannot be controlled</li>
						</ul>
						<li>XML Schema defines mixed content outside of the content model</li>
						<ul>
							<li>the content model is defined like an element-only content model</li>
							<li>the <code>mixed</code> attribute on the type marks the type as being mixed</li>
						</ul>
						<li>XML Schema mixed content can use all model groups</li>
						<ul>
							<li>it is possible to constrain element occurrences in the same way as in element-only content</li>
							<li>in practice, this feature is rarely used (mixed content often is very loosely defined)</li>
						</ul>
					</ul>
					<listing src="global-local.xsd" line="3-11"/>
				</slide>
				<slide>
					<title>Empty Content</title>
					<ul>
						<li>DTDs have a special keyword for empty elements</li>
						<ul>
							<li>instead of the content model, the keyword <code>EMPTY</code> is used</li>
							<li>empty elements may still have attribute lists associated with them</li>
						</ul>
						<li>XML Schema empty types are defined implicitly</li>
						<ul>
							<li>there is no explicit keyword for defining an empty type</li>
							<li>if a type has no model group inside it, it is empty (it still may have attributes)</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Typed XML Structures</title>
				<ul>
					<li>XML Schema introduces a <q>type layer</q> to schema languages</li>
					<li>Types facilitate abstractions (and thus modeling)</li>
					<li>Simple types can be restricted to yield more specific types</li>
					<li>Complex types define how elements have to be used</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xsd2" cover="slidycover">
		<title short="XSD 2">XML Schema — Part II</title>
		<date short="2006-10-05">Thursday, October 5, 2006</date>
		<toc id="reading"><a href="http://www.awprofessional.com/articles/printerfriendly.asp?p=31477&amp;rl=1">XML Schema Identity Constraints</a></toc>
		<toc id="resources"/>
		<toc id="abstract">XML Schema allows greater flexibility in defining constraints on intra-document references than the ID/IDREF construct of DTDs. XML Schema's <em>Identity Constraints</em> are scoped, typed, and can be used for elements or attributes. The second aspect of XML Schema discussed today is the derivation of complex types. Complex types can be derived by <em>restriction</em> or extension. Complex type restriction defines the restricted type to be a more restricted version of the base type. Complex type extension make it possible to extend the base type by either adding attributes or contents (only by appending new content to the content model).</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<part>
			<title>Local and Global Definitions</title>
			<slide>
				<title>Named and Anonymous Types</title>
				<ul>
					<li>Types can be named or anonymous</li>
					<ul>
						<li>named types can be reused (for elements, attributes, or type derivation)</li>
						<li>anonymous types can only be used where they are defined</li>
					</ul>
					<li>DTD <q>types</q> are always anonymous (they cannot be reused)</li>
				</ul>
				<pre>&lt;!ELEMENT person <span style="color : red ; ">(name, address) >
&lt;!ATTLIST person id ID #REQUIRED ></span></pre>
				<ul>
					<li>DTDs have everything hardcoded</li>
					<ul>
						<li>complex types are always locally defined</li>
						<li>elements are always globally defined</li>
						<li>attributes are always locally defined</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Elements</title>
				<slide>
					<title>Local vs. Global Elements</title>
					<ul>
						<li>Elements can be defined in a type or in the schema</li>
						<ul>
							<li>local elements can only be used where they are defined</li>
							<li>global elements can be reused, they can serve as building blocks</li>
						</ul>
						<li>Elements and complex types depend on each other</li>
						<ul>
							<li>an element is defined by a type, often this will be a complex type</li>
							<li>a complex type is defined by its contents, which are elements and/or attributes</li>
						</ul>
					</ul>
					<listing src="global-local.xsd" line="4-12"/>
				</slide>
				<slide>
					<title>Reusable Elements</title>
					<listing src="complex-example.xsd"/>
				</slide>
			</part>
			<part>
				<title>Attributes</title>
				<slide>
					<title>Attribute Definitions</title>
					<ul>
						<li>DTDs treat attributes as something entirely different from element content</li>
						<ul>
							<li>they are defined in an <code>ATTLIST</code>, not in the <code>ELEMENT</code> definition</li>
							<pre>&lt;!ELEMENT person (name, address) >
<span style="color : red ; ">&lt;!ATTLIST person id ID #REQUIRED ></span></pre>
							<li>they have a special range of <link href="dtd-attr-type"/> as opposed to elements</li>
							<pre>&lt;!ATTLIST person id <span style="color : red ; ">ID</span> #REQUIRED ></pre>
						</ul>
						<li>XML Schema overcomes these restrictions only partially</li>
						<ul>
							<li><link href="xsd-simple-types"/> are used to define attribute (or element) contents</li>
							<li>attributes are still described as something entirely different from an element's content model</li>
						</ul>
						<li>Attributes could be better integrated into the model</li>
						<ul>
							<li><link href="relax-ng"/> treats attributes as part of an element's content model</li>
							<li>this makes it trivial to have choices of element content and attributes</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Reusing Attributes</title>
					<ul>
						<li>DTDs treat attributes as something local to an element</li>
						<ul>
							<li>attributes are defined in an element's <code>ATTLIST</code></li>
							<li>reusing attributes for more than on element requires <link href="param-entity"/></li>
						</ul>
						<li>XML Schema better supports reuse of schema components</li>
						<ul>
							<li>types can be defined locally (anonymous) or globally (named)</li>
							<li>elements and attributes can be defined globally or locally</li>
						</ul>
						<li>Globally defined attributes can be reused</li>
						<ul>
							<li>the attribute definition does not tie it to any occurrence</li>
							<li>the attribute can then be referenced from an complex type definition</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Reusing Attributes (Example)</title>
					<listing src="global-local.xsd"/>
				</slide>
			</part>
		</part>
		<part id="xsd-names">
			<title>Names and Namespaces</title>
			<slide>
				<title>Definitions</title>
				<ul>
					<li>Many XML Schemas define a vocabulary for a namespace</li>
					<ul>
						<li>DTDs do not have any support for namespaces</li>
						<li>XML Schema heavily builds on <link href="xmlns"/></li>
					</ul>
					<li>XML Schema provides support for declaring a vocabulary's namespace</li>
					<pre>&lt;xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" <span style="color : red ; ">targetNamespace="http://www.example.com/"</span>></pre>
					<li>Schema-validation can check for proper namespace usage</li>
					<ul>
						<li>the <xml>targetNamespace</xml> has to be used in the instance</li>
						<li>if the namespace does not match, validation cannot succeed</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Instances</title>
				<ul>
					<li>The schema defines the <xml>targetNamespace</xml> of the vocabulary</li>
					<ul>
						<li>all globally defined elements, attributes, and types are in that namespace</li>
						<li>the instances must declare and use the namespace to be schema-valid</li>
					</ul>
				</ul>
				<listing src="multicol.html" line="2-5"/>
				<ul>
					<li>A <em>prefixed name</em> is not the same as a <em>qualified name</em></li>
					<ul>
						<li>if there is a default namespace, unprefixed elements are still qualified</li>
					</ul>
					<li>Nasty details about XML Namespaces and attributes</li>
					<ul>
						<li>the default namespace does <em>not</em> apply to attributes</li>
						<li>attributes must therefore <em>always be prefixed</em> if they need to be qualified</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Name Qualification</title>
				<ul>
					<li>Global elements and attributes have to be used as qualified names</li>
					<ul>
						<li>this means that they must be referred to by their namespace-qualified name</li>
						<li>if a default namespace is used, elements are qualified <em>without carrying a prefix</em></li>
						<li>since the default namespace does not apply to attributes, they always must be explicitly prefixed</li>
					</ul>
					<li>Local elements and attributes may be used qualified or unqualified</li>
					<ul>
						<li>this control <em>only applies to locally define elements or attributes</em></li>
						<li>the default defined by XML Schema is not a good choice</li>
						<li>because of how XML Namespaces work, a non-default choice is recommended</li>
					</ul>
					<li>XML Schema allows control over how local names have to be used</li>
				</ul>
				<pre>&lt;xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://www.example.com/" <span style="color : red ; ">elementFormDefault="qualified" attributeFormDefault="unqualified"</span>></pre>
			</slide>
		</part>
		<part>
			<title>Identity Constraints</title>
			<slide>
				<title>Element = Type + Constraints</title>
				<ul>
					<li>DTDs and XML Schema are mainly about specifying grammars</li>
					<ul>
						<li>types describe the allowed values using grammars</li>
						<li>grammar-oriented schemas have some nice properties</li>
					</ul>
					<li>DTD's <link href="ididref"/> allow additional constraints</li>
					<ul>
						<li>apart from the grammar definition, cross-references in the tree are supported</li>
						<li>validation checks the integrity of the cross-references, not only the tree</li>
					</ul>
					<li>DTD's ID/IDREF are a very simple mechanism</li>
					<ul>
						<li>they are always global</li>
						<li>they also define the type of the attribute (XML names)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Improvements over ID/IDREF</title>
				<ul>
					<li>XML Schema's <em>Identity Constraints</em> improve DTD's ID/IDREF</li>
					<li>Identity constraints are scoped and apply only to a selected set of nodes</li>
					<ul>
						<li>the constraint applies only to a selected set of nodes (using XPath)</li>
					</ul>
					<li>Identity constraints are evaluated using typed values</li>
					<ul>
						<li><code>ID</code>s must be XML names (no numbers allowed)</li>
						<li><q><code>2</code></q> ≟ <q><code>+00002</code></q> should be evaluated based on the type (string or decimal?)</li>
						<li>XML Schema separates the constraint from the type of the selected nodes</li>
					</ul>
					<li>Identity constraints may select elements or attributes</li>
					<ul>
						<li>XPaths are used to select the constrained values, they can select elements or attributes</li>
					</ul>
					<li>Multiple fields</li>
					<ul>
						<li>it is possible to select for than one field for a constraint (phone &amp; area code must be unique)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Types of Identity Constraints</title>
				<ul>
					<li>Uniqueness constraints</li>
					<ul>
						<li>if there is a field, it must have a unique value among the selected nodes</li>
					</ul>
					<li>Key constraints</li>
					<ul>
						<li>there must be a field, and it must have a unique value among the selected nodes</li>
					</ul>
					<li>Key reference constraints</li>
					<ul>
						<li>the field must refer to an existing value in the referred key</li>
						<li>if the key reference also is constrained by a key, only one reference may use the referred key</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Identity Constraint Definitions</title>
				<ul>
					<li>Identity constraints are part of an element definition</li>
					<li>There are <em>three important factors</em> to an identity constraint</li>
					<ol>
						<li>location of the identity constraint's definition</li>
						<li>the nodes to which the constraint should be applied</li>
						<li>the fields which are used to evaluate the constraint</li>
					</ol>
					<li>If the constraint is a key reference constraint, there is a <em>fourth factor</em></li>
					<ol start="4">
						<li>the key constraint that is used for checking the references</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Identity Constraint Evaluation</title>
				<img style="width : 90% ; margin : 4% ; " src="identity-constraints.png"/>
			</slide>
			<slide>
				<title>Advanced Identity Constraints</title>
				<img style="width : 90% ; margin : 4% ; " src="identity-constraints++.png"/>
			</slide>
		</part>
		<part id="xsd-complex-derivation">
			<title>Complex Type Derivation</title>
			<slide>
				<title>Type Derivation</title>
				<ul>
					<li>XML Schema supports the modeling approach of <em>specialization</em></li>
					<ul>
						<li>simple types can be restricted to create more specialized simple types</li>
						<li>each value of a restricted type is also a valid value of the more general type</li>
					</ul>
					<li>Complex types are combinations of content and attributes</li>
					<li>Specialization of complex types can be done in two ways</li>
					<ul>
						<li><link href="xsd-complex-restriction"/>: more restricted ways of using the content and/or attributes</li>
						<li><link href="xsd-complex-extension"/>: additional content and/or attributes may be used</li>
					</ul>
					<li>Both kinds of complex type derivation can be regarded as specialization</li>
					<ul>
						<li><link href="xsd-complex-restriction"/>: for US persons the country must always be set to <q><code>US</code></q></li>
						<li><link href="xsd-complex-extension"/>: people having an employee number are employees</li>
					</ul>
				</ul>
			</slide>
			<part id="xsd-complex-restriction">
				<title>Complex Type Restriction</title>
				<slide>
					<title>Removing Choices</title>
					<ul>
						<li>Complex types usually allow variability</li>
						<ul>
							<li><xml>minOccurs</xml> and <xml>maxOccurs</xml> allow variability in occurrences</li>
							<li><xml>choice</xml> groups allow to choose between a number of alternatives</li>
							<li>attributes may be flagged as <code>use="optional"</code></li>
							<li>simple types allow the individual values to use certain sets of values</li>
						</ul>
						<li>Complex type restriction allows restrictions of all these variations</li>
						<ul>
							<li><xml>minOccurs</xml> and <xml>maxOccurs</xml> can be made more restrictive</li>
							<li>alternatives can be removed from choice groups</li>
							<li>optional attributes can flagged as <code>use="required"</code> or <code>use="prohibited"</code></li>
							<li>the simple types of values can be set to more restricted simple types</li>
						</ul>
						<li>The technical way of defining restrictions is cumbersome</li>
						<ul>
							<li>when the base type changes, the restricted type has to be fixed by hand</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Complex Type Restriction (Example)</title>
					<listing src="complex-restriction.xsd"/>
				</slide>
				<slide>
					<title>Processing Restricted Complex Types</title>
					<ul>
						<li>Values of restricted types are values of the base types</li>
						<ul>
							<li>type restriction is defined so that restricted type values are always base type values</li>
							<li>code processing a type can be reused to process restricted types</li>
						</ul>
						<li>If there is a well-designed type hierarchy, programming becomes easier</li>
						<ul>
							<li>simple code can be written to handle the basic types</li>
							<li>if required, more advanced code can be written for the restricted types</li>
							<li>in many cases, restriction is more for validation than for processing</li>
						</ul>
						<li>XML Schemas may even use <em>abstract types</em></li>
						<ul>
							<li>no element will ever use the <xml>addressType</xml></li>
							<li>concrete elements will only use restricted types</li>
							<li>there can be code handling the <xml>addressType</xml> which handles all addresses</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="xsd-complex-extension">
				<title>Complex Type Extension</title>
				<slide>
					<title>Adding Content</title>
					<ul>
						<li>Complex types are element content and attributes</li>
						<ul>
							<li>extensions can add content, but only at the end of the base content</li>
							<li>extensions can add attributes (order is not significant for attributes)</li>
						</ul>
						<li>Adding content to existing content may not change the existing content</li>
						<ul>
							<li>if the content is <em>element only</em>, it has to remain element only</li>
							<li>if the content is <em>mixed</em>, is has to remain mixed</li>
							<li>if the content is <em>empty</em>, it may become element only or mixed</li>
							<li>the reason for these rules is that <em>mixed</em> is a global property of a type</li>
						</ul>
						<li>Adding attributes simply adds these to the list of existing attributes</li>
						<ul>
							<li>the added attributes may be optional or required</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Complex Type Extension (Example)</title>
					<listing src="complex-extension.xsd"/>
				</slide>
				<slide>
					<title>Processing Extended Complex Types</title>
					<ul>
						<li>Values of restricted types are <em>not</em> values of the base types</li>
						<ul>
							<li>type extension adds content add/or attributes to a type</li>
							<li>if content is added, it is always added at the end of the base type's content</li>
						</ul>
						<li>If there is a well-designed type hierarchy, programming becomes easier</li>
						<ul>
							<li>simple code can be written to handle the basic types</li>
							<li>if that should handle extended types, it must be written to handle extensions</li>
							<li><q>handling extensions</q> can be as simple as skipping them</li>
						</ul>
						<li>XML Schemas may even use <em>abstract types</em></li>
						<ul>
							<li>no element will ever use the <xml>addressType</xml></li>
							<li>concrete elements will only use restricted types</li>
							<li>code handling extended types can build on code handling the base type</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Schema Components</title>
				<img style="margin : 2% ; height : 75%" src="xsd-components.gif"/>
			</slide>
			<slide>
				<title>XML Schema Features</title>
				<ul>
					<li>XML Schema allows defining a grammar for XML documents</li>
					<li>Types make it easier to turn a model into a grammar</li>
					<li>Identity constraints enable non-grammar constraints to be expressed</li>
					<li>Some of the things we have not seen:</li>
					<ul>
						<li>named groups, modularizing schemas, wildcards, substitution groups, ...</li>
					</ul>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xmlmodeling" cover="slidycover">
		<author short="F. Michel">Felix Michel</author>
		<affiliation short="ETHZ">ETH Zürich, Switzerland</affiliation>
		<title short="Modeling">From Model to Markup</title>
		<date short="2006-10-10">Tuesday, October 10, 2006</date>
		<toc id="reading"/>
		<toc id="resources"/>
		<toc id="abstract">While XML is very useful for representing and manipulating structured data, the question remains where these structures come from. They are usually some kind of encoding for a conceptual model, but there is no established and universally accepted way of how to connect the modeling world with XML markup. Some of the challenges and approaches to XML and modeling will be presented in this lecture. The goal of this lecture is to raise awareness for the current gap between models and markup, and for practical approaches how to bridge that gap.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>About Me</title>
			<ul>
				<li>Felix Michel</li>
				<li>A Visiting Student Researcher from <a href="http://www.ethz.ch">ETHZ</a> doing his Master's Thesis @ UC</li>
				<li>My Thesis' <a href="http://dret.net/netdret/theses#model">subject</a> is <q>Visualization of XML models and their mapping to schema languages</q></li>
				<li>Erik Wilde is my advisor</li>
				<li>My english is not schema-valid, nor is my pronunciation</li>
				<pre>control@brain$&gt;: ./english-parser --validation=lax</pre>
			</ul>
		</slide>
		<part>
			<title>Motivation</title>
			<slide>
				<title>Writing schemas is hard &amp; tedious</title>
				<ul>
					<li>Schema languages can be hard to deal with because</li>
					<ul>
						<li>they are limited (DTD)</li>
						<li>they are complex (XML Schema)</li>
					</ul>
					<li>Schemas are not a good way to model data</li>
					<li>...for practical reasons:</li>
					<ul>
						<li>schemas can be confusing to look at (XML Schema)</li>
						<li>schema are not intelligible to non-developers</li>
					</ul>
					<li>..for technical reasons:</li>
					<ul>
						<li>schemas are <em>representation-</em> and <em>technology-</em>specific (they only describe XML)</li>
						<li>XML is a tree-centric format, and so are its schema languages</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Writing 5cHEMa$ is cool &amp; g33ky!</title>
				<ul>
					<li>This obviously is an engineer's view</li>
					<li>Writing schemas should not be a goal, but a means to an end</li>
					<li>Data modeling should be done by data modelers</li>
					<li>A more conceptual view of the structures represented by the schemas would...</li>
					<ul>
						<li>...ease understanding for non-developers</li>
						<li>...enable focusing more on semantics</li>
						<li>...provide a more goal-oriented approach</li>
						<li>...allow for model integration</li>
						<li>...lead to more platform independence</li>
					</ul>
				</ul>
			</slide>
			<slide id="model-layers">
				<title>We Need a Conceptual Lodeling Layer</title>
				<ul>
					<li>We need to be able to do modeling on a more abstract level</li>
					<li>In the database world, this level is called the <q>conceptual modeling layer</q></li>
					<table rules="all" class="stackTab">
						<tr>
							<th rowspan="2">Layer</th>
							<th colspan="2">Technology</th>
						</tr>
						<tr>
							<th>database world (SQL)</th>
							<th>XML related</th>
						</tr>
						<tr>
							<td>conceptual</td>
							<td>Entity Relationship Diagrams</td>
							<td><link href="model-to-markup">???</link></td>
						</tr>
						<tr>
							<td>logical</td>
							<td>DDL (<code>CREATE TABLE ...</code>)<br/>DML (<code>INSERT INTO ...</code>)</td>
							<td>schemas<br/>XQuery</td>
						</tr>
						<tr>
							<td>physical</td>
							<td>table space</td>
							<td>XML</td>
						</tr>
					</table>
					<li>Modeling Layers? Layering Models?</li>
				</ul>
			</slide>
		</part>
		<part>
			<title> Modeling Layers — Layering Models?</title>
			<part id="model">
				<title>Modeling</title>
				<slide id="model-definition">
					<title>What is a Model?</title>
					<ul>
						<li><q>A simplification</q></li>
						<ul>
							<li>only consider some relevant / interesting traits, neglecting details / unneeded properties</li>
							<li>the architectural model of the parthenon in the <a href="http://www.thebritishmuseum.ac.uk/gr/debate.html">British Museum</a></li>
						</ul>
						<li><q>An abstraction</q></li>
						<ul>
							<li>a generalization / concept / idealization:</li>
							<li>determine / distinguish common / defining / characteristic attributes</li>
							<li>platonic ideas</li>
						</ul>
						<li><q>A template</q></li>
						<ul>
							<li>a mold / blueprint / reference example:</li>
							<li>prescribe relevant / defining attributes</li>
							<li>παραδειγμα stone in the ancient <a href="http://pr.caltech.edu/periodicals/eands/articles/LXVII1/samos.html">tunnel</a> in Eupalinos, Samos, Greece</li>
						</ul>
						<li>The former has no physical embodiment, whereas the latter have</li>
						<li>Usually there is a one-to-many relationship between <em>models</em> and <em>instances</em></li>
					</ul>
					<note>
						The cardinality of the relationship model-instance can be many-to-one! Think of a toy vessel — e.g., a <em>Titanic</em>! But wait... which one's now the <em>model</em>?
					</note>
				</slide>
				<slide id="model-fun">
					<title>What is a Model? (Natural Language)</title>
					<img src="http://www.markrobertwahlberg.com/mwck.jpg" style="height: 70%;" class="floatRight" />
					<ul>
						<li>Compare the use of <q>model</q> in (more) natural language:</li>
						<ul>
<!--
							<li><a href="http://en.wikipedia.org/wiki/George_Washington">George Washington</a> was a model of virtue</li>
-->
							<li><a href="http://sketchup.google.com/3dwarehouse/details?mid=e86d2c2bac08770486ed0f2c6244a512">Sather tower</a> is modeled after <a href="http://en.wikipedia.org/wiki/St_Mark%27s_Campanile">San Marco's <em>campanile</em></a></li>
							<li>The Ford <a href="http://www.modelt.org/">Model 'T'</a></li>
							<li>A fashion model <a href="http://www.markrobertwahlberg.com/mwck.jpg">→</a><sup>1</sup></li>
							<li><q><a href="http://www.brickshelf.com/gallery/stwr90/bridges/image0007.jpg.jpg">This</a> model is a model of the Golden Gate Bridge and it took me about 300over pieces...</q><sup>2</sup></li>
							<li><a href="http://www.anismodel.ch/f_anisherz.jpg">Anisguetzli</a>-<a href="http://www.anismodel.ch/modelbilder/zurich/6313g.jpg">Model</a> (used for a traditional swiss aniseed cookie)</li>
							<li>Architectural models:<sup>3</sup></li>
							<ul class="dialog">
								<li><b>Derek Zoolander:</b> <q>What is this? A center for ants? How can we be expected to teach children to learn how to read... if they can't even fit inside the building?</q></li>
								<li><b>Mugatu:</b> <q>Derek, this is just a small...</q></li>
								<li><b>Derek Zoolander:</b> <q>I don't wanna hear your excuses! The building has to be at least... three times bigger than this!</q></li>
							</ul>
						</ul>
					</ul>
					<note>
						<ol>
							<li>From <a href="http://www.markrobertwahlberg.com/">http://www.markrobertwahlberg.com</a></li>
							<li>From <a href="http://www.mocpages.com/moc.php/6128">www.mocpages.com</a></li>
							<li>From <a href="http://www.imdb.com/title/tt0196229/quotes">http://www.imdb.com/</a>, with thanks to Dr.Sc. Erik Wilde.</li>
						</ol>
					</note>
				</slide>
				<slide id="model-modeling">
					<title>Modeling</title>
					<ul>
						<li>The process of identifying those <q>relevant</q> attributes and omitting the rest</li>
						<li>The formulation (or translation) thereof in a <em>way of description</em> commonly used or even standardized (mapping to a <em>meta-model</em>)</li>
						<li>This involves design decisions and trade-offs to be made</li> 
						<ul>
							<li>choosing the right granularity</li>
							<li>flexibility vs. stringency</li>
						</ul>
						<li>Modeling therefore always is...</li>
						<ul>
							<li>...connected to a certain <em>perspective</em></li>
							<li>...limited to a certain <em>scope</em></li>
							<li>...having a <em>main focus</em></li>
						</ul>
					</ul>
					<note>In a certain field / realm / <em>universe of discourse</em> there usually is some agreement on how modeling has to be done. This is an essential prerequisite for models to be used as a subject of discussion / negotiation / evaluation. This agreement can have been achieved implicitly or by standardization. In the <link href="model-fun">example</link> above, Derek Zoolander does not know the conventions implicitly being agreed on when dealing with architectural models.</note>
				</slide>
				<slide id="model-reasons">
					<title>Why modeling?</title>
					<ul>
						<li>Get a bigger picture:</li>
						<ul>
							<li>focus on relevant features</li>
							<li>deal with data's meaning instead of representation</li>
							<li>facilitate interaction / integration</li>
						</ul>
						<li>Description: allows for</li>
						<ul>
							<li>analysis &amp; improvement</li>
							<li>documentation</li>
							<li>verification</li>
						</ul>
						<li>Prescription: allows for</li>
						<ul>
							<li><a href="http://www.henrywagner.org/pictures/ff/Thanksgiving2005/IMG_1752.php">simulation</a></li>
							<li>prognosis</li>
							<li>making assumptions (e.g. when creating software processing the XML described by a schema)</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="layer">
				<title>Layering</title>
				<slide id="layer-cs">
					<title>Layering in Computer Science</title>
					<ol>
						<li>Encapsulating / hiding details / internals</li>
						<li>Enabling working on a simpler / more goal-oriented level</li>
						<li>Reusing frequent structures / procedures (<q>patterns</q><sup>1</sup>)</li>
						<li>Gaining independence from specific technologies / media</li>
					</ol>
					<note>
						<ol>
							<li><q>Patterns are models that are sufficiently general, adaptable, and worthy of imitation that we can reuse them.</q> (From: Glushko, Robert J. and McGrath, Tim: <em>Document Engineering</em>, p. 90). Identifying such patterns is a modeling task!</li>
						</ol>
					</note>
				</slide>
				<slide id="layer-communication">
					<title>Layering in Computer Science: Protocol stacks</title>
					<ol class="floatRight">
						<li>Encapsulation</li>
						<li>Goal-orientation</li>
						<li>Pattern reuse</li>
						<li>Independence</li>
					</ol>
					<ul>
						<li>The TCP/IP protocol stack</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>Google, a wiki, your blogging tool</td>
								<td rowspan="2"><em>Application</em></td>
								<td>(1)</td>
							</tr>
							<tr>
								<td>HTTP, FTP</td>
								<td>(3)</td>
							</tr>
							<tr>
								<td>TCP, UDP</td>
								<td><em>Transport</em></td>
								<td>(2)</td>
							</tr>
							<tr>
								<td>IP</td>
								<td><em>Network</em></td>
								<td>(4)</td>
							</tr>
							<tr>
								<td>Ethernet, 802.11</td>
								<td><em>MAC (Medium Access Control)</em></td>
								<td></td>
							</tr>
							<tr>
								<td>NRZ, DSSS, 16QAM</td>
								<td rowspan="2"><em>Physical</em></td>
								<td></td>
							</tr>
							<tr>
								<td>Copper wire, fiber, RF</td>
								<td></td>
							</tr>
						</table>
						<li>Metaphor: Andrew S. Tanenbaum's interpreter</li>
					</ul>
				</slide>
				<slide id="layer-compiler">
					<title>Layering in Computer Science: Compiler</title>
					<ol class="floatRight">
						<li>Encapsulation</li>
						<li>Goal-orientation</li>
						<li>Pattern reuse</li>
						<li>Independence</li>
					</ol>
					<ul>
						<li>Compilers, Virtual Machines, VHDL</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>UML</td>
								<td>(2)</td>
							</tr>
							<tr>
								<td><span style="font-variant: small-caps">Java</span></td>
								<td>(4)</td>
							</tr>
							<tr>
								<td>Virtual machine</td>
								<td></td>
							</tr>
							<tr>
								<td>C</td>
								<td>(1)</td>
							</tr>
							<tr>
								<td>Assembler Language</td>
								<td></td>
							</tr>
							<tr>
								<td>Machine Code</td>
								<td></td>
							</tr>
							<tr>
								<td>VHDL</td>
								<td></td>
							</tr>
							<tr>
								<td>Logic Gates</td>
								<td>(3)</td>
							</tr>
						</table>
						<li>Metaphor: Big enterprise with strong vertical division of work</li>
					</ul>
				</slide>
				<slide id="layer-physiology">
					<title>Layering in Human Physiology</title>
					<ul>
						<li>Perception:</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>My Bicycle</td>
								<td>Memory, social implications...</td>
							</tr>
							<tr>
								<td>A Bicycle</td>
								<td>Prior interpretative knowledge</td>
							</tr>
							<tr>
								<td>Circles, lines and a diamond shape, colored</td>
								<td>Form vision</td>
							</tr>
							<tr>
								<td>Electrical pulses</td>
								<td>Neural transmission</td>
							</tr>
							<tr>
								<td>A concentration of Rhodopsin, Opsin, cGMP...</td>
								<td>Retina: Rods &amp; Cones</td>
							</tr>
							<tr>
								<td>A bunch of sunrays, reflected</td>
								<td>Pupil, eye lens</td>
							</tr>
						</table>
						<li>Motion:</li>
						<table rules="rows" class="stackTab">
							<tr>
								<td>Shake hands</td>
								<td>Conscious action</td>
							</tr>
							<tr>
								<td colspan="2" align="center">...all the way down to...</td>
							</tr>
							<tr>
								<td>Converting some ATP</td>
								<td>Mitochondrions</td>
							</tr>
						</table>
					</ul>
				</slide>
			</part>
			<slide id="model-layer">
				<title>The Combination: Model Layers!</title>
				<ul>
					<li>Modeling layers: Models of different level of abstraction and / or granularity, stacked onto each other as layers</li>
					<li>There are different classifications of such layers of modeling</li>
					<li>There is a diligent classification used in the context of data modeling: P.P.S. Chen's <q>Multiple Views of Data</q>:<sup>1</sup></li>
					<ol>
						<li>Information concerning entities and relationships in our minds</li>
						<li>Information structure — organization in which entities and relationships are represented by data.</li>
						<li>Access-path-independent data structure — the data structures which are not involved with search schemes, indexing schemes, etc.</li>
						<li>Access-path-dependent data structure</li> 
					</ol>
					<li>Most often the coarser classification <link href="model-layers">instance / logical / conceptual</link> layer is used</li>
				</ul>
				<note>
					<ol>
						<li>Chen, Peter Pin-Shan: <em>The Entity-Relationship Model — Towards a Unified View od Data</em>, Cambridge MA, 1976.</li>
					</ol> 
				</note>
			</slide>
		</part>
		<part id="data-modeling">
			<title>Data Modeling</title>
			<slide id="data-modeler">
				<title>Who's a Data Modeler?</title>
				<ul>
					<li><em>You are Data Modelers!</em></li>
					<li>You did (implicitly, and perhaps unconsciously) Data Modeling while creating the résumé DTD</li>
					<li>Most likely you modeled your data already when creating the XML instance</li>
					<li>In either case you most likely were thinking in some semantic structures on a more <em>conceptual</em> level than the final schema or instance resides</li>
					<li>Did anybody...</li>
					<ul>
						<li>...draw some trees?</li>
						<li>...sketch some boxes?</li>
						<li>...write down some lists?</li>
					</ul>
				</ul>
			</slide>
			<slide id="quality-criteria">
				<title>Quality Criteria</title>
				<ul>
					<li>See the quality criteria from <link href="bestpractices">The Good, the Bad, and the Ugly</link></li>
					<ul>
						<li>avoid <link href="redundant-data">redundancy</link>, especially within <link href="schema-redundancy">schemas</link></li>
						<li>enforce <link href="reuse">reuse</link></li>
						<li>be <link href="element-vs-attribute">consistent</link></li>
						<li>choose a reasonable level of <link href="granularity">granularity</link></li>
						<li>think in terms of the logical structure (e.g., the <link href="infoset">Infoset</link>) rather than in terms of the physical representation (e.g., the XML how it is stored in a file)</li> 
					</ul>
					<li>What is <q>good</q> data modeling?</li>
					<li>➟ most of the above criteria are applicable to data modeling as well</li>
				</ul>
			</slide>
			<part>
				<title>An example case: Harry again</title>
				<slide id="harry-returns">
					<title>Harry returns</title>
					<ul>
						<li>Make up a data model for general résumés</li>
						<ol>
							<li>capture <em>logical structure</em> rather than <em>representation</em> of the data</li>
							<li>do not include information encoded in the <em>document's structure</em> into the <em>instance's data</em> (rely on that certain amount of self-desciption of XML)</li>
						</ol>
						<li>In <link href="xml-views">The Good, the Bad, and the Ugly</link>, we have been told:
							<q>think about working with a tree rather than working with a text file</q> — so let's draw a tree!
						</li>
					</ul>
					<note>
						<ol>
							<li>The fact, that the person's name and contact information usually is given in a <em>head</em> section of a résumé document does not necessarily mean that such a head section is a relevant structural element: It's just a representational convention, and therefore should not be part of the data model. When creating a <em>view</em> of the data, we can utilize our knowledge of appropriate representational conventions by rendering personal information in a head section.</li>
							<li>If our vocabulary contains dedicated elements for <elem>education</elem> and <elem>experience</elem> it is not necessary to include attributes or elements specifying section titles like <q>education</q> or <q>experience</q>: This information can be retrieved from the structure and again added to a specific view when being generated.</li>
						</ol>
					</note>
				</slide>
				<slide id="harry-tree">
					<title>Retrieving the résumé's structure</title>
					<div class="panel">
						<object data="img/res01c.svg" type="image/svg+xml" width="900" height="450">
							<param name="src" value="img/res01c.svg" />
							<p>You need to have an SVG viewer installed in order to view this graphic!</p>
						</object>
					</div>
					<ul>
						<li>We omit <elem>phone</elem> and <elem>email</elem> for the sake of simplicity</li>
					</ul>
				</slide>
				<slide id="good-dtd-1">
					<title>A well-designed DTD (1)</title>
					<listing src="resume-model-01.dtd" line="1-9" />
					<ul>
						<li>The order constraint in <elem>résumé</elem> probably is introduced by the schema language's limitations</li>
						<ul>
							<li>SGML: <q>&amp;</q> connector (not part of XML)</li>
							<li>XML Schema: <xsde>all</xsde> model group</li>
						</ul>
						<li><elem>date</elem> has been made more flexible (by denoting <elem>day</elem> to be optional) in order to be useable more generally</li>
						<li><code>proficiency</code> readily can be modeled as an attribute; its value space is a good example for an enumeration</li>
					</ul>
				</slide>
				<slide>
					<title id="good-dtd-2">A well-designed DTD (2)</title>
					<listing src="resume-model-01.dtd" line="12-16" />
					<ul>
						<li><elem>address</elem> and <elem>name</elem> are good examples for reusable elements</li>
						<li><elem>name</elem> is a semantically sensible container: <elem>first</elem> etc are <em>parts-of</em> it</li>
						<li>the nesting <elem>startDate</elem> (or <elem>endDate</elem>, respectively) » <elem>date</elem> semantically is a less expressive relation; it merely is inserted in order to</li>
						<ul>
							<li>reuse <elem>date</elem></li>
							<li>insert two <elem>date</elem> elements,</li>
							<li>giving the two some reasonable (self-descriptive) names</li>
						</ul>
						<li>this clearly would be a good case for using named types in XML Schema</li>
					</ul>
				</slide>
<!-- 
				<slide id="good-dtd-3>
					<title>A well-designed DTD (3)</title>
					<listing src="resume-model-01.dtd" line="5-6" />
					<listing src="resume-model-01.dtd" line="18-22" />
					<ul>
						<li><code>institution</code> and <code>company</code> (or even <code>education</code> and <code>experience</code>) are candidates for parameter entities (DTD) or global types / reusable groups / inheritance<sup>1</sup> (XML Schema)</li>
					</ul>
					<note>
						<ol>
							<li>What would be the base type? What would be the base type's semantics?</li>
						</ol>
					</note>
				</slide>
 -->
 				<slide id="good-instance">
					<title>A good instance from a well-designed DTD</title>
					<listing src="resume-felix-01.xml" line="10-13" />
					<listing src="resume-felix-01.xml" line="24-27" />
					<listing src="resume-felix-01.xml" line="96-100" />
					<listing src="resume-felix-01.xml" line="132-134" />
				</slide>
				<slide>
					<title>A look at the <em>essay</em> section</title>
					<listing src="resume-felix-01.xml" line="121-130" />
					<ul>
						<li>There's still redundant data!</li>
						<li>Plain, unstructured text; hard to be interpreted for machines</li>
						<li>Use NLP... — or improve your data model!</li>
					</ul>
				</slide>
				<slide id="good-dtd-review">
					<title>Critical Review: A well-designed DTD?</title>
					<ul>
						<li>The DTD is not that well-designed:</li>
						<ol>
							<li>regarding the markup: container elements around <elem>education</elem> and <elem>experience</elem> items would be nice</li>
							<li>from a data perspective: resolve redundancies</li>
							<li>conceptually: allow for representation of these semantic relations</li>
						</ol>
						<li>To achieve this, we need:</li>
						<ul>
							<li>A better modeling formalism</li>
							<li>More precise quality criteria</li>
						</ul>
						<li>In the world of relational databases, both of them exist</li>
					</ul>
				</slide>
			</part>
			<part id="relational-modeling">
				<title>Excursus: Data Modeling in the World of Relational Databases</title>
				<slide id="relational-nf">
					<title>Quality Criteria: Normal Forms</title>
					<ul>
						<li>There are a well-defined quality criteria of increasing strictness, called <a href="http://dret.net/glossary/normalform">Normal Forms</a><sup>1</sup></li>
						<li>An informal example:</li>
						<table rules="all" class="tab">
							<tr>
								<th class="tabHi">ID</th>
								<th>Name</th>
								<th>Study</th>
								<th>Department</th>
							</tr>
							<tr>
								<td class="tabHi">24536133</td>
								<td>Bob</td>
								<td>Computer Science</td>
								<td>College of Engineering</td>
							</tr>
							<tr>
								<td class="tabHi">34125004</td>
								<td>Alice</td>
								<td>Document Engineering</td>
								<td>School of Information</td>
							</tr>
							<tr>
								<td class="tabHi">11042019</td>
								<td>Zlatan</td>
								<td>Computer Science</td>
								<td>College of Engineering</td>
							</tr>
						</table>
						<li>Must be resolved to:</li>
						<table rules="all" class="tab" style="float: right;">
							<tr>
								<th class="tabHi">Study</th>
								<th>Department</th>
							</tr>
							<tr>
								<td class="tabHi">Computer Science</td>
								<td>College of Engineering</td>
							</tr>
							<tr>
								<td class="tabHi">Document Engineering</td>
								<td>School of Information</td>
							</tr>
						</table>
						<table rules="all" class="tab">
							<tr>
								<th class="tabHi">ID</th>
								<th>Name</th>
								<th>Study</th>
							</tr>
							<tr>
								<td class="tabHi">24536133</td>
								<td>Bob</td>
								<td>Computer Science</td>
							</tr>
							<tr>
								<td class="tabHi">34125004</td>
								<td>Alice</td>
								<td>Document Engineering</td>
							</tr>
							<tr>
								<td class="tabHi">11042019</td>
								<td>Zlatan</td>
								<td>Computer Science</td>
							</tr>
						</table>
					</ul>
					<note>
						<ol>
							<li>Yet, the most strict normal forms (4NF, 5NF) are hardly ever used in practice for the reasons mentioned <link href="redundant-data">earlier</link></li>
						</ol>
					</note>
				</slide>
				<slide id="relational-er">
					<title>Conceptual Modeling Formalism: Entity-Relationship Diagrams</title>
					<ul>
						<li>There is a well-established (graphical!) formalism for conceptual modeling</li>
						<li>Using the formalism in the correct way leads to data models complying with the quality criteria</li>
						<li>An example:</li>
					</ul>
					<div class="panel">
						<img src="ER.png" />
					</div>
					<ul>
						<li>Today, there exist several ER-dialects; <a href="http://www.uml.org/">UML</a> is a superset</li>
					</ul>
				</slide>
			</part>
			<part id="conceptual-modeling">
				<title>Conceptual Modeling for XML Data</title>
				<slide id="conceptual-modeling-today">
					<title>Is there anything similar for XML?</title>
					<ul>
						<li>There is no established formalism; nor is there any formalism as suitable for XML as ER is for relational databases</li>
						<li>There are scientific proposals: Several extensions of ER (XER, ERX, EER), formal grammars (XGrammar)</li>
						<ul>
							<li>some of them have limited scope, some are impractical for real-world deployment, some of them have no graphical equivalent at all</li>
						</ul>
						<li>Extended/restricted versions of <a href="http://www.uml.org/">UML</a></li>
						<ul>
<!--						
							<li>UML cannot model everything XML could express (e.g. mixed content)</li>
							<li>XML cannot express everything UML can model (e.g. ?)</li>
-->						
							<li>most often, UML is simply used as a drawing tool for schemas: Schemas are a bad way for conceptual modeling, UML is a bad drawing tool</li>
						</ul>
						<li>Textual descriptions can be used</li>
						<ul>
							<li>possibly inaccurate, translation to schemas is error-prone and not formally verifiable</li>
						</ul>
						<li>In strongly data-oriented context, spreadsheets can be used (<a href="http://docs.oasis-open.org/ubl/cd-UBL-1.0/">UBL</a>)</li>
						<ul>
							<li>this restricts the structural expressiveness in fundamental ways</li>
						</ul>
					</ul>
				</slide>
				<slide id="conceptual-modeling-probelms">
					<title>Why is it so hard to create a suitable formalism?</title>
					<ul>
						<li>XML data can be much more complex than relational data:</li>
						<ul>
							<li>hierarchical structures (maybe even recursive)</li>
							<li>mixed content</li>
							<li>alternatives (<q><code>|</code></q> connector, <xsde>choice</xsde> model group)</li>
							<li>order constraints</li>
							<li>ID/IDREF constructs</li>
							<li>faceted/enumerated content models</li>
						</ul>
						<li>There are no clear quality criteria: Many things are a question of <em>style</em> or <em>taste</em></li>
						<ul>
							<li>schema languages allow for different paradigms to be used</li>
							<li>functionally equivalent (and semantically similar) results can be achieved by quite different means: e.g. <em>choice model group</em> vs. <em>substitution group</em></li>
							<li>the quality of the final schema or XML may considerably depend on things beyond the scope of a conceptual model</li>
						</ul>
					</ul>
				</slide>
				<slide id="conceptual-modeling-approach">
					<title>An informal formalism</title>
					<ul>
						<li>As there is no established notation, <q><link href="model-to-markup">informal models may use any notation</link></q></li>
						<li>Well then, let's draw some boxes!</li>
						<li>But even though, let's try to do it in a systematic way:</li>
					</ul>
					<table rules="groups" border="1" class="tab">
						<thead>
							<tr>
								<th></th>
								<th>Determine...</th>
								<th>Phase</th>
								<th>Question</th>
								<th>Example</th>
								<th>Action</th>
							</tr>
						</thead>
						<tbody>
							<tr>
								<td>1.</td>
								<td>Entities</td>
								<td>Inventory</td>
								<td rowspan="2">What's there?</td>
								<td><elem>person</elem>, <elem>company</elem></td>
								<td>Sketch boxes</td>
							</tr>
							<tr>
								<td>2.</td>
								<td>Reusable Objects</td>
								<td>Analysis</td>
								<td><elem>address</elem>, <elem>date</elem></td>
								<td>Perhaps include some model libraries (<a href="http://docs.oasis-open.org/ubl/cd-UBL-1.0/#SCHEMAS">UBL</a>)</td>
							</tr>
							<tr>
								<td>3.</td>
								<td>Reusable Tags</td>
								<td>Markup design</td>
								<td>What do we need?</td>
								<td>lists, hyperlinks, headings</td>
								<td>Perhaps include some schemas (<a href="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">XHTML</a>)</td>
							</tr>
							<tr>
								<td>4.</td>
								<td>Relations</td>
								<td>Assembly</td>
								<td>What's the connection?</td>
								<td>has-a, contains, references</td>
								<td>Draw arcs and arrows</td>
							</tr>
						</tbody>
					</table>
				</slide>
				<slide id="conceptual-modeling-example">
					<title>The résumé structure, informally formalized</title>
					<div class="panel">
						<img  style="height : 70% ; margin : 2% ; " src="informal.png" />
					</div>
					<note>
						<ul>
							<li>This clearly is not a tree anymore</li>
							<li>There's no specific schema language indicated</li>
							<li>Still missing are:</li>
							<ul>
								<li>constrained / faceted value spaces</li>
								<li>order constraints</li>
								<li>type inheritance (XML Schema)</li>
							</ul>
						</ul>
					</note>
				</slide>
				<slide id="better-dtd">
					<title>An even better DTD</title>
					<listing src="resume-model-02.dtd" line="5-6" />
					<listing src="resume-model-02.dtd" line="42-43" />
					<listing src="resume-model-02.dtd" line="22-23" />
					<listing src="resume-model-02.dtd" line="27-31" />
					<listing src="resume-model-02.dtd" line="47-53" />
				</slide>
				<slide id="better-instance">
					<title>An even better instance</title>
					<listing src="resume-felix-02.xml" line="69-75" />
					<ul>
						<li>The essay section: real mixed-content markup!</li>
					</ul>
					<listing src="resume-felix-02.xml" line="134-142" />
				</slide>
				<slide id="generating-views">
					<title>Generating Views</title>
					<ul>
						<li>Utilize the (non-tree) ID/IDREF-relations in order to retrieve the data where needed:</li>
					</ul>
					<listing src="narrative.xsl" line="183-191" />
					<ul>
						<li>From a well-designed data structure with rich semantic connectivity, multiple <em>views</em> can easily be derived</li>
						<li>As an example, two different views have been generated using two XSLT 1.0 stylesheets</li>
						<table class="tab" rules="all">
							<tr>
								<th>View</th>
								<th>XSLT</th>
								<th>HTML</th>
							</tr>
							<tr>
								<th>Tabular</th>
								<td align="center"><a href="src/tabular.xsl">tabular.xsl</a></td>
								<td align="center"><a href="src/tabular.html">tabular.html</a></td>
							</tr>
							<tr>
								<th>Textual</th>
								<td align="center"><a href="src/narrative.xsl">narrative.xsl</a></td>
								<td align="center"><a href="src/narrative.html">narrative.html</a></td>
							</tr>
						</table>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xmlmx-conclusions">
			<title>Conclusions</title>
			<slide>
				<title>XML and Modeling</title>
				<ul>
					<li>Conceptual modeling is highly desirable</li>
					<li>When to be used in a <link href="layer-communication">communication-type</link> scenario, conceptual modeling inherently has to be relying upon an agreement on established formalisms</li>
					<li>When to be used in a <link href="layer-compiler">compiler-type</link> scenario, conceptual modeling requires the availability of appropriate tools</li>
					<li>There is no formalism really suitable for XML-centric data; there are no sophisticated tools<sup>1</sup></li>
					<li>➟ There is a gap to be bridged!</li>
				</ul>
				<note>
					<ol>
						<li>Tools are evil — at least potentially: they may introduce toolchain-dependencies, they may generate ugly markup, and they may deprive us of our most beloved hobby: writing schemas.</li>
					</ol>
				</note>
			</slide>
		</part>
	</presentation>
	<presentation id="schemalanguages" cover="slidycover">
		<title short="Schema Languages">Alternative Schema Languages — Schematron</title>
		<date short="2006-10-17">Tuesday, October 17, 2006</date>
		<toc id="reading">Chapter 4.5 (159-163)</toc>
		<toc id="resources"><a href="http://www.thaiopensource.com/relaxng/design.html">The Design of RELAX NG</a>; <a href="http://www.schematron.com/" title="Schematron Home Page">Schematron</a></toc>
		<toc id="abstract">While XML Schema is the most popular schema language in use today and for the foreseeable future, it is only one representative from a class of languages which are all designed for the purpose of testing whether some XML document satisfies a set of constraints. This test could of course also be conducted programmatically, but this is not portable and not easily maintainable. Schema languages thus often use a declarative approach to specifying how to conduct validation. A very simple yet very powerful language for this is <em>Schematron</em>, which uses the expressive power of XPath for testing whether a document satisfies a set of conditions. Schematron is <em>rule-based</em> in contrast to the more traditional <em>grammar-based</em> schema languages and complements these very well.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>XML Schema Languages</title>
			<ul>
				<li>XML schema languages define constraints for XML documents</li>
				<ul>
					<li>defining constraints declaratively is better then writing program code</li>
					<li>programming should be deferred as long as possible</li>
				</ul>
				<li>XML schema languages validate XML documents</li>
				<ul>
					<li>DTDs check XML documents against the grammar rules</li>
					<li>XML Schemas support additional datatyping for validating contents</li>
				</ul>
				<li>Applications often have many more constraints</li>
				<ul>
					<li>global constraints on the characters used in the document</li>
					<li><em>co-constraints</em> which relate content to content</li>
					<li>comparisons with external data (such as controlled lists)</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Schema-Validation and Applications</title>
			<img src="schema-valid-documents.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<slide>
			<title>Validation Pipelines</title>
			<ul>
				<li>Validation is a modular task with various facets</li>
				<ul>
					<li>modularization is a popular and useful principle in computer science</li>
					<li>XML Schema is the attempt to build the <em>one and only schema language</em></li>
					<li>more modular approaches might lead to more flexible validation</li>
				</ul>
				<li>Validation pipelines are useful in various scenarios</li>
				<ul>
					<li>perform validation based on a sequence of basic validation tasks</li>
					<li>make validation more configurable (partial validation)</li>
					<li>make validation more flexible (different validation stages)</li>
				</ul>
				<li>Validation pipelines can be easily implemented</li>
				<ul>
					<li>programming languages support passing DOM trees as parameters</li>
					<li><em>XML pipeline languages</em> can be used to implement pipelines declaratively</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Validation Pipeline Example</title>
			<img src="validation-pipeline.png" style="width : 90% ; margin : 4% ; "/>
		</slide>
		<part id="relax-ng">
			<title>RELAX NG</title>
			<slide>
				<title>Design by Committee</title>
				<ul>
					<li>XML Schema was a political decision</li>
					<ul>
						<li>several schema languages were competing to replace DTDs</li>
						<li>DCD, DDML, SOX, XML Data, and XDR were inputs to XML Schema</li>
						<li>XML Schema became the first unreadable W3C specification</li>
						<li>implementing XML Schema correctly is a hard (large number of specialized rules)</li>
					</ul>
					<li>Researchers were looking for a more elegant solution</li>
					<ul>
						<li>the underlying formalism should be well-defined and well-studied</li>
						<li>the schema language should be easy to learn and use</li>
						<li>lessons learned from DTDs should be included</li>
					</ul>
					<li>RELAX NG is the merger of two similar approaches</li>
					<ul>
						<li><em>Tree Regular Expressions (TREX)</em></li>
						<li><em>Regular Language description for XML (RELAX)</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>RELAX NG +/-</title>
				<ul>
					<li>RELAX NG and XML Schema are direct competitors</li>
					<li>Advantages of RELAX NG</li>
					<ul>
						<li><link href="relaxng-compact"/></li>
						<li>the document element is well-defined</li>
						<li>SGML's <q><code>&amp;</code></q> is supported (<xsde>all</xsde> is extremely limited)</li>
						<li>non-deterministic content models</li>
					</ul>
					<li>Disadvantages of RELAX NG</li>
					<ul>
						<li>no datatype support (datatype libraries can be included)</li>
						<li>no modeling facilities in the spirit of XML Schema's type derivation</li>
						<li>less popular than XML Schema</li>
						<li>no support for XML Schema's numeric occurrence constraints (<xsd>minOccurs</xsd>/<xsd>maxOccurs</xsd>)</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Principles</title>
				<slide>
					<title>Validation</title>
					<ul>
						<li>Validation should not change the document</li>
						<ul>
							<li>there are no default values</li>
						</ul>
						<li>Only schema↔instance tests are supported</li>
						<ul>
							<li>there is no type hierarchy as in XML Schema (schema↔schema)</li>
							<li>there are no identity constraints (instance↔instance)</li>
						</ul>
						<li>Grammars should not be restricted</li>
						<ul>
							<li>DTDs and XML Schema disallow non-determinism</li>
							<li>RELAX NG allows non-deterministic content models</li>
							<li><code>chess = white, (black, white)*, black?</code></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Grammars</title>
					<ul>
						<li>RELAX NG grammars have a start symbol</li>
						<ul>
							<li>DTDs and XML Schema do not have start symbols</li>
						</ul>
						<li>Attributes are defined as part of the <q>content model</q></li>
						<ul>
							<li>a more homogeneous view of the XML document tree</li>
							<li>this allows alternatives of elements and attributes</li>
						</ul>
						<li>Grammars are a set of named rules</li>
						<ul>
							<li>rules define how an element is composed</li>
							<li>local definitions (nested specifications of content models) are possible</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Example</title>
				<slide>
					<title>DTD and XML Schema</title>
					<listing src="document.dtd"/>
					<listing src="document.xsd"/>
				</slide>
				<slide>
					<title>RELAX NG</title>
					<listing src="document.rng"/>
				</slide>
				<slide id="relaxng-compact">
					<title>RELAX NG Compact Syntax</title>
					<listing src="document.rnc"/>
				</slide>
			</part>
		</part>
		<part id="dsdl">
			<title short="DSDL">Document Schema Definition Languages (DSDL)</title>
			<slide>
				<title>Modular Validation</title>
				<ul>
					<li>RELAX NG gained popularity as an XML Schema alternative</li>
					<ul>
						<li>RELAX NG left useful functionality out of the language</li>
						<li><link href="schematron"/> appeared as a useful addition to schema languages</li>
					</ul>
					<li>Based on the idea of modular validation, DSDL was announced</li>
					<ul>
						<li>DSDL should define a set of complementary schema languages</li>
						<li>DSDL should also define a framework for applying these languages</li>
					</ul>
					<li>Development and support have been slow and disappointing</li>
					<ul>
						<li>RELAX NG and Schematron are successful</li>
						<li>all other parts of DSDL are undefined or underspecified</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>DSDL Master Plan</title>
				<ul>
					<li>DSDL is described as having the following parts</li>
					<ol>
						<li>DSDL Overview</li>
						<li><link href="relax-ng"/></li>
						<li><link href="schematron"/></li>
						<li><em>Namespace-based Validation Dispatching Language (NVDL)</em></li>
						<li><em>Data Type Library Language (DTTL)</em></li>
						<li>Path-based integrity constraints</li>
						<li><em>Character Repertoire Validation Language (CRVL)</em></li>
						<li><em>Document Schema Renaming Language (DSRL)</em></li>
					</ol>
					<li>It is unlikely that DSDL will succeed</li>
					<ul>
						<li>years of identical presentations and stalled developments</li>
						<li>ISO is not a good place for fast-paced technologies</li>
					</ul>
					<li>DSDL should be regarded as an <em>inspiration</em>, not as a <em>solution</em></li>
				</ul>
			</slide>
		</part>
		<part id="schematron">
			<title>Schematron</title>
			<slide>
				<title>XPath Again</title>
				<ul>
					<li>Schematron popularized XPath-based testing of XML documents</li>
					<ul>
						<li>the language is far from being well-designed</li>
						<li>it can be easily used to write down a number of XPath-based constraints</li>
						<li>it can be used as an inspiration to do a better job of XPath-based testing</li>
					</ul>
					<li>XPath makes it very easy to select parts of XML trees</li>
					<ul>
						<li>many XSLT programs contain some <q>validation</q> before processing</li>
						<li>validation and processing should be kept separate</li>
						<li>if validation is kept separate, there may be easier ways than XSLT</li>
					</ul>
					<li>Schematron has been built for human-oriented reporting</li>
					<ul>
						<li>Schematron outputs are text messages that human should read</li>
						<li>machine-oriented validation requires different features</li>
						<li>integrating Schematron into machine-oriented pipelines requires some efforts</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Basics</title>
				<ul>
					<li>Schematron schemas can be regarded as scripts for XPath testing</li>
					<ul>
						<li><em>patterns</em> group together a set of task-oriented tests</li>
						<li><em>rules</em> define tests which have to be applied in a certain context</li>
						<li><em>assertions</em> are XPaths which are evaluated in a given context</li>
					</ul>
					<li>Schematron in most cases are not covering the whole XML tree</li>
					<ul>
						<li>for the rules to work, the structural integrity should be validated first</li>
						<li>if the structure of the tree is valid, rules specify additional constraints</li>
						<li>Schematron is a <em>complement</em> to grammars, not a <em>replacement</em></li>
					</ul>
				</ul>
				<listing src="address.sch" line="2-9"/>
			</slide>
			<part>
				<title>Implementation</title>
				<slide>
					<title>Performing Validation</title>
					<ul>
						<li>Schema languages are declarative inputs for validation</li>
						<ul>
							<li>schema languages are not executable programming languages</li>
							<li>to perform validation, some software component must process documents and schemas</li>
						</ul>
						<li>Schema languages require supporting software</li>
						<ul>
							<li>DTDs are part of XML, validating XML processor must perform DTD validation</li>
							<li>XML Schema is a separate specification, an XML Schema processor is required</li>
						</ul>
						<li>Schematron is built around XPaths</li>
						<ul>
							<li>any technology supporting XPath evaluation would be a good foundation</li>
							<li>XSLT is a technology supporting XPath evaluation</li>
							<li>XSLT's program flow control is good enough to support Schematron</li>
							<li>XSLT processors are available for a large number of platforms</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XSLT-Generated XSLT</title>
					<ul>
						<li>XSLT uses XML as its syntax</li>
						<ul>
							<li>this is inconvenient because XSLT programs are very verbose</li>
							<li>processing XSLT with XSLT is supported very well</li>
							<li>for power users, the benefits outweigh the discomforts</li>
						</ul>
						<li>How is it possible to generate XSLT from XSLT?</li>
						<ul>
							<li>it is impossible to use literal result elements (they would be executed)</li>
							<li>it would be against XSLT's idea to write the resulting XSLT as text</li>
							<li>there must be a distinction between <q>executable</q> and <q>output</q> XSLT elements</li>
						</ul>
					</ul>
					<pre>&lt;xsl:template match="rule">
	<span style="color : red ; ">&lt;xsl:template match="{@context}"></span>
		&lt;xsl:apply-templates select="assert"/>
	<span style="color : red ; ">&lt;/xsl:template></span>
&lt;/xsl:template></pre>
				</slide>
				<slide>
					<title>XSLT-Based Schematron</title>
					<img src="schematron-xslt.png" style="width : 90% ; margin : 4% ; "/>
				</slide>
				<slide>
					<title>Compiling Assertions</title>
					<listing src="skeleton1-5.xsl" line="60-64"/>
					<listing src="skeleton1-5.xsl" line="67-67"/>
					<listing src="skeleton1-5.xsl" line="146-179"/>
				</slide>
				<slide>
					<title>Compiled Example</title>
					<listing src="address.xsl"/>
				</slide>
			</part>
			<part id="schematron-patterns">
				<title>Patterns</title>
				<slide>
					<title>Grouping Tests</title>
					<ul>
						<li>Patterns are containers for a set of <link href="schematron-rules"/></li>
						<ul>
							<li>patterns are used for representing goal-oriented parts of the validation</li>
							<li>achieving one goal may require checking within various contexts</li>
						</ul>
						<li>Patterns are described by a title and additional text</li>
						<ul>
							<li>Schematron is geared towards human users</li>
							<li>title and text are documentation only, they are never used for validation</li>
						</ul>
						<li>Patterns can be grouped by <em>phases</em> for different validation tasks</li>
						<ul>
							<li>patterns group a set of rules specific for one validation goal</li>
							<li>depending on the application, different validation phases may require different sets of patterns</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="schematron-rules">
				<title>Rules</title>
				<slide>
					<title>Setting the Context</title>
					<ul>
						<li>Setting the context is essential for XPath expressions</li>
						<ul>
							<li>within <link href="schematron-patterns"/>, rules group context-specific <link href="schematron-assertions"/></li>
							<li>assertion XPaths are evaluated relative to a rule's context</li>
						</ul>
						<li><em>Abstract rules</em> make is possible to reuse assertions</li>
						<ul>
							<li>abstract rules are not evaluated (they do not have a context)</li>
							<li>other rules may import assertions by <em>extending</em> an abstract rule</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="schematron-assertions">
				<title>Assertions</title>
				<slide id="schematron-assert">
					<title>Assertions with <code>assert</code></title>
					<ul>
						<li><code>assert</code> is used to specify assertions</li>
						<ul>
							<li>if the XPath evaluates to <code>false</code>, the assertion's content is output</li>
							<li>assertion are always evaluated as boolean (type casting will be applied)</li>
						</ul>
						<li>Assertion XPaths are evaluated relative to the containing rule's context</li>
						<ul>
							<li>moving an assertion from one rule to another will change its meaning</li>
						</ul>
						<li>XPath is not good for expressing grammar rules</li>
						<ul>
							<li>grammar checking should be left to grammar-oriented languages</li>
						</ul>
					</ul>
					<pre>&lt;!ELEMENT ENTRY (NAME, ADDRESS, PHONENUM+, EMAIL) ></pre>
					<pre>( count(NAME) = 1 and count(ADDRESS) = 1 and count(EMAIL) = 1 ) and ( NAME[following-sibling::ADDRESS] and ADDRESS[following-sibling::PHONENUM] and PHONENUM[following-sibling::EMAIL] ) and ( count(NAME|ADDRESS|PHONENUM|EMAIL) = count(*) )</pre>
				</slide>
				<slide id="schematron-report">
					<title>Assertions with <code>report</code></title>
					<ul>
						<li><code>report</code> is used to generate reports</li>
						<ul>
							<li>if the XPath evaluates to <code>true</code>, then the assertion's content is output</li>
							<li>assertion are always evaluated as boolean (type casting will be applied)</li>
						</ul>
						<li>Logically, <code>assert</code> and <code>report</code> are inverse</li>
						<ul>
							<li><code>assert</code> is used to test conformance (it outputs errors)</li>
							<li><code>report</code> id used to report observations (it outputs messages)</li>
							<li>Schematron's processing model is underspecified (check assertions, print outputs)</li>
						</ul>
						<li>Schematron is useful for reporting to humans</li>
						<ul>
							<li>machine-oriented environments need a better processing model</li>
							<li>using Schematron as a starting point could be a good way to start</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Report Example</title>
					<listing src="address-report.sch" line="2-12"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Validation is Good</title>
				<ul>
					<li>Validation is better than writing code</li>
					<li>Validation should be seen as a pipeline process</li>
					<li>RELAX NG can be a useful and simple substitute for XML Schema</li>
					<li>Schematron supports XPath-oriented constraints for XML documents</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="xdbms" cover="slidycover">
		<title short="XML &amp; DB">XML and Database Systems</title>
		<date short="2006-10-19">Thursday, October 19, 2006</date>
		<toc id="reading"><a href="http://www.rpbourret.com/xml/XMLAndDatabases.htm">XML and Databases</a></toc>
		<toc id="resources"><a href="http://www.research.ibm.com/journal/sj/414/reinwald.pdf">XML Programming with SQL/XML and XQuery</a>; <a href="http://www.sqlx.org/">SQL/XML</a>; <a href="http://www.w3.org/XML/Query/">XML Query</a></toc>
		<toc id="abstract">XML is the most popular data format for exchanging data, but the majority of data within applications and closed systems is still stored in <em>Relational Database Managements Systems (RDBMS)</em>. This leads to two main issues, the first one being how moving data between XML formats and RDBMS can be done easily and efficiently, so that moving data between these two worlds can be done as easy as possible. The second issue is how to map the data models between these two worlds. Relational data can easily be represented in XML, because tables can be easily represented in trees. Things can be more complicated in the other direction, because arbitrary XML can be hard to store in a relational database. For XML-centric scenarios, <em>XML Database Management Systems (XDBMS)</em> are an interesting alternative, which provide XML-specific query capabilities with <em>XML Query (XQuery)</em>.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>XML is Trees</title>
			<ul>
				<li>XML documents are trees</li>
				<ul>
					<li>applications may have different internal data models (mapped to trees for interfacing)</li>
					<li>the exchange and processing of XML documents is tree-based</li>
				</ul>
				<li>Where and how is XML being used?</li>
				<ul>
					<li>as a pure transfer syntax (Web Services very often are used like this)</li>
					<li>as artifacts that have a longer lifespan (archiving of XML business documents)</li>
					<li>as the applications data model (there is nothing but XML)</li>
				</ul>
				<li>XML usage results in very different requirements for XML tools</li>
				<ul>
					<li>Web Service programmers often never see the tree</li>
					<li>archived XML documents need to be searchable</li>
					<li>XML-centric applications need to store XML efficiently</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Storing XML</title>
			<ul>
				<li>XML documents are text files</li>
				<ul>
					<li>they can be stored in file systems (they are <q>self-describing</q>)</li>
					<li>they can be retrieved by searching through the file system</li>
				</ul>
				<li>File systems are not designed to store millions of documents</li>
				<ul>
					<li>standard file system implementation usually slow down dramatically</li>
					<li>standard procedures (backup/restore/concurrency) do not work well</li>
				</ul>
				<li>Problems with <q>File Systems as XML Databases</q></li>
				<ul>
					<li>the number of documents is too large</li>
					<li>there is no structured access (<a href="http://sourceforge.net/projects/xpsh">XPath Shell (XPsh)</a> provides an <q>XML-<code>find/grep</code></q>)</li>
					<li>there is no access optimization (XPsh is very slow)</li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>XML and Databases</title>
			<slide>
				<title>Data needs Databases</title>
				<ul>
					<li>Databases are what should be used for data storage</li>
					<ul>
						<li>they provide much better querying and performance than file systems</li>
						<li>databases are the foundation of all IT systems</li>
					</ul>
					<li>Databases are designed around a set of assumptions</li>
					<ul>
						<li>data must adhere to the data model of the database</li>
						<li>databases can only work well if the data is modeled well</li>
						<li><link href="data-modeling"/> for databases is an essential part of an IT infrastructure</li>
					</ul>
					<li>Data modeling is based on a <em>meta model</em></li>
					<ul>
						<li>a model can only represent what is supported by the underlying modeling language</li>
						<li>XML is a model in itself: partially ordered trees with typed nodes (and constraints)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Model Mismatches</title>
				<ul>
					<li>Simple models can be expressed in rich metamodels</li>
					<ul>
						<li>features not required for the model can be ignored</li>
						<li>the simple model can be expressed using a subset of the metamodel</li>
						<li>e.g., DTDs can be expressed in the XML Schema metamodel</li>
					</ul>
					<li>Complex models cannot be (adequately) expressed in simple metamodels</li>
					<ul>
						<li>if the model is richer than the metamodel, information is lost</li>
						<li>the unsupported parts can be <em>piggybacked</em> onto other mechanism</li>
						<li>piggybacking technically works, but it is brittle, inefficient, and <em>a sign</em></li>
						<li>e.g., XML Schema's type information cannot be expressed in DTD</li>
						<li>parts of it can be mapped to <link href="param-entity"/></li>
					</ul>
				</ul>
			</slide>
			<part id="rdbms">
				<title>Relational Databases</title>
				<slide>
					<title>Generic XML Storage</title>
					<ul>
						<li>Relational databases are the state of the art since 1976</li>
						<ul>
							<li>this is long enough to build highly optimized and robust systems</li>
							<li>this is long enough to have ER hard-wired into some brains</li>
						</ul>
						<li>XML is more powerful than ER</li>
						<ul>
							<li>repetitions of elements do not map well</li>
							<li>choices do not map well</li>
							<li>ordered content does not map well</li>
							<li>mixed content does not map well</li>
						</ul>
						<li>Storing XML in a relational database is hard</li>
						<ul>
							<li>it can be done by piggybacking structural information as content</li>
							<li>using the resulting structures is awkward and very inefficient</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Tree Table</title>
					<table width="90%" style="margin : 4%">
						<tr>
							<td valign="middle" align="center">
								<img style="width : 90% ; margin : 2% ; " src="tree-table.png"/>
							</td>
							<td valign="middle" align="center">
								<table style="width : 90% ; " border="1">
									<tr>
										<th>ID</th>
										<th>Type</th>
										<th>Name</th>
										<th>Value</th>
										<th>Parent</th>
										<th>Left</th>
									</tr>
									<tr>
										<td>1</td>
										<td>Root</td>
										<td></td>
										<td></td>
										<td></td>
										<td></td>
									</tr>
									<tr>
										<td>2</td>
										<td>Element</td>
										<td>a</td>
										<td></td>
										<td>1</td>
										<td></td>
									</tr>
									<tr>
										<td>3</td>
										<td>Element</td>
										<td>b</td>
										<td></td>
										<td>2</td>
										<td></td>
									</tr>
									<tr>
										<td>4</td>
										<td>Element</td>
										<td>c</td>
										<td></td>
										<td>2</td>
										<td>3</td>
									</tr>
									<tr>
										<td>5</td>
										<td>Text</td>
										<td></td>
										<td><q>Text</q></td>
										<td>3</td>
										<td></td>
									</tr>
									<tr>
										<td>6</td>
										<td>Attribute</td>
										<td>att</td>
										<td><q>42</q></td>
										<td>4</td>
										<td></td>
									</tr>
								</table>
							</td>
						</tr>
					</table>
				</slide>
			</part>
			<part>
				<title>Database Support for XML</title>
				<slide>
					<title>Why XML and Databases?</title>
					<ul>
						<li>XML is constantly getting more popular</li>
						<ul>
							<li>XML as a document format was first used as <em>wire format</em> only</li>
							<li>instead of parsing manually, parser interfaces provide better XML support</li>
							<li><em>data binding frameworks</em> bind XML even more tightly into applications</li>
							<li>if all programs somehow <q>hide the XML</q>, why not work on XML directly?</li>
						</ul>
						<li>What is XML for an application?</li>
						<ul>
							<li>an (increasingly popular) way to represent the data?</li>
							<li>the data itself?</li>
							<li>currently, the representation perspective is more popular</li>
							<li>as XML is increasingly penetrating applications, this may change</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML Interchange</title>
					<img style="height : 70% ; margin : 2% ; " src="xml-dbms-application.png"/>
				</slide>
				<slide id="xdbms-dbms-xmlsupport">
					<title>XML Support in DBMS</title>
					<img style="height : 70% ; margin : 2% ; " src="xml-dbms-xmlsupport.png"/>
				</slide>
				<slide>
					<title>XML DBMS</title>
					<img style="height : 70% ; margin : 2% ; " src="xml-dbms-xdbms.png"/>
				</slide>
			</part>
			<part>
				<title>XML Storage in Databases</title>
				<slide>
					<title>Model Mapping</title>
					<ul>
						<li>Relational databases are not good tools for storing XML</li>
						<ul>
							<li>they might be appropriate if the schema disallows problematic constructs</li>
							<li>they often are already deployed and applications must live with them</li>
						</ul>
						<li>If the data model is ER-oriented, relational databases are good tools</li>
						<ul>
							<li>XML may be invisible from the model point of view</li>
							<li>parts of the model may be encoded as an XML schema</li>
						</ul>
						<li>If the XML is not visible in the model, it can be structurally inaccessible</li>
						<ul>
							<li>e.g., a product catalog may contain product descriptions in XHTML rich text snippets</li>
							<li>for managing the product catalog data, the XHTML is not relevant</li>
						</ul>
						<li>If the XML is part of the model, it should be accessible structurally</li>
						<ul>
							<li>if the product catalog XHTML contains links to other products, these links are important</li>
							<li>they could be extracted (creating redundant and hard to maintain data)</li>
							<li>if they are hidden in the XHTML, all XHTML snippets have to be parsed</li>
							<li>ideally, the database should be able to <q>query the XHTML snippet</q></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML is Text</title>
					<ul>
						<li>XML documents can be stored as text</li>
						<ul>
							<li>databases typically have various datatypes for text storage</li>
							<li>if the database supports Unicode, any XML document can be stored</li>
						</ul>
						<li>The XML structure is completely invisible to the database</li>
						<ul>
							<li>working with the XML requires querying and parsing the XML text</li>
							<li>this kind of storage does not allow any querying of the XML content</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML → ∗LOB</title>
					<img style="width : 90% ; margin : 2% ; " src="xml-storage-lob.png"/>
				</slide>
				<slide id="xdbms-xmldatatype">
					<title>XML as a Datatype</title>
					<ul>
						<li>SQL supports a wide variety of datatypes</li>
						<ul>
							<li>typed values are better than untyped values (they enable type-specific operations)</li>
							<li>XML can be regarded as just another data type</li>
						</ul>
						<li>Introducing a datatype lets the database recognize the data</li>
						<ul>
							<li>XML data can be stored in some format (a <q>persistent DOM</q>)</li>
							<li>databases can provide functionality avoiding parsing/serialization (DOM-based)</li>
						</ul>
					</ul>
				</slide>
				<slide id="xdbms-xmltype">
					<title>XML Datatype</title>
					<img style="width : 90% ; margin : 2% ; " src="xml-storage-datatype.png"/>
				</slide>
				<slide>
					<title>Mapping XML to Models</title>
					<ul>
						<li>Model-relevant data must be mapped to the database structures</li>
						<ul>
							<li>this assumes there is a ER-model which describes the database structure</li>
							<li>mapping XML is easy by definition because the XML is ER-compliant</li>
						</ul>
						<li>Is the data accessed as table data?</li>
						<ul>
							<li>if shredded data is only used to assemble it again, it is just performance overhead</li>
							<li>if shredded data is accessed relationally, then shredding makes sense</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Shredding (XML → Columns)</title>
					<img style="width : 90% ; margin : 2% ; " src="xml-storage-shredding.png"/>
				</slide>
				<slide>
					<title>XML as First-Class Citizen</title>
					<ul>
						<li>The <link href="xdbms-xmltype"/> defines XML as a sub-concept of ER</li>
						<ul>
							<li>the overall structure of the database is relational</li>
							<li>attributes may be of type XML, which means storing trees in tables</li>
						</ul>
						<li>Tables are not the only way to see the world</li>
						<ul>
							<li>XML trees are an <em>alternative</em> to tables, not a <em>datatype</em></li>
							<li>XML-centric applications should not be forced to use tables at all</li>
						</ul>
						<li>XML can be regarded as replacing the ER-concept altogether</li>
						<ul>
							<li>the database simply stores XML documents</li>
							<li>applications can store, query, update, and manage XML documents in the database</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XML DBMS</title>
					<img style="width : 90% ; margin : 2% ; " src="xml-storage-xdbms.png"/>
				</slide>
			</part>
		</part>
		<part>
			<title>XML in Relational Databases</title>
			<slide>
				<title>RDBish XML</title>
				<ul>
					<li>XML schemas can be designed with databases in mind</li>
					<ul>
						<li>avoid unbounded repetitions of elements</li>
						<li>avoid choices</li>
						<li>avoid ordered content</li>
						<li>avoid mixed content</li>
					</ul>
					<li>Many XML schemas are designed RDBish for compatibility reasons</li>
					<ul>
						<li>it was decided that the XML should enable an easy mapping to relational structures</li>
						<li>the person designing the schema has a ER-structured brain</li>
						<li>the schema has been generated from a relational database schema</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Problematic XML</title>
				<ul>
					<li>XML in its full glory is too much for tables</li>
					<ul>
						<li>XML has been developed as a document format</li>
						<li>XML is about hierarchy (which <em>intentionally</em> have been left out of ER)</li>
						<li>XML is about highly irregular structures</li>
					</ul>
					<li>XML often is said to have to <q>flavors</q></li>
					<ul>
						<li><em>data-oriented XML</em>: regular data which can be easily mapped to tables</li>
						<li><em>document-oriented XML</em>: irregular structures which are hard to map to tables</li>
						<li>real-world XML often is a bit of both (e.g., <em>content</em> and <em>metadata</em>)</li>
					</ul>
					<li>Hybrid approaches sometimes are a good solution</li>
					<ul>
						<li>data-oriented can be shredded and stored in tables</li>
						<li>the document-oriented rest is stored as one object (text or <code>XML</code>)</li>
					</ul>
				</ul>
			</slide>
			<part id="sqlxml">
				<title>SQL/XML</title>
				<slide>
					<title>SQL/XML:2003</title>
					<ul>
						<li>SQL/XML provides <link href="xdbms-dbms-xmlsupport"/>s</li>
						<ul>
							<li>it introduces <link href="xdbms-xmldatatype"/></li>
							<li>it introduces a number of operations for generating XML from query results</li>
							<li>it defines mappings to bridge both worlds (SQL and XML)</li>
						</ul>
						<li>SQL/XML does not change anything about the database model</li>
						<ul>
							<li>data is still stored in tables only</li>
							<li>a column of a table may use the <code>XML</code> type</li>
							<li>queries may return results in XML rather than as SQL result sets</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>SQL/XML Example</title>
					<pre>SELECT
	e.EmpId,
	e.FirstName,
	e.LastName,
	e.StartDate,
	e.EndDate
FROM Employees e WHERE e.EmpId = 12</pre>
				<pre>SELECT
	XMLELEMENT (NAME "employee",
		XMLATTRIBUTES(e.EmpId as "id"),
		XMLELEMENT(NAME "names",
		XMLELEMENT(NAME "first", e.FirstName),
		XMLELEMENT(NAME "last", e.LastName)),
		XMLELEMENT(NAME "hire-dates",
			XMLATTRIBUTES(e.StartDate as "start", e.EndDate as "end")))
FROM Employees e WHERE e.EmpId = 12</pre>
				</slide>
				<slide>
					<title>SQL/XML:2007</title>
					<ul>
						<li>Adds the concept of <em>XML Tables</em></li>
						<li>XML Tables are not tables, they are containers for XML</li>
						<li>SQL/XML:2007 changes the database's data model</li>
						<ul>
							<li>it is now possible to have a database with <q>no tables</q></li>
							<li>likely use cases are to have both: traditional and XML tables</li>
						</ul>
						<li>SQL/XML:2007 defines a hybrid database: relational and XML database</li>
					</ul>
				</slide>
			</part>
		</part>
		<part id="xdbms-databases">
			<title>XML Databases</title>
			<slide id="xdbms-xdm">
				<title>Storing XML</title>
				<ul>
					<li>XML documents are text documents</li>
					<li>Infosets are abstractions of XML documents (information loss)</li>
					<li>XPath node trees are abstractions of Infosets (more information loss)</li>
					<li>XML Schema adds useful information to an Infoset (type annotations)</li>
					<li><em>XQuery 1.0 and XPath 2.0 Data Model (XDM)</em></li>
					<ul>
						<li>a new creation of the W3C for XQuery and XPath (2.0)</li>
						<li>Infoset + Types + Sequences</li>
						<li>considerably more complicated than the XPath 1.0 node tree model</li>
					</ul>
				</ul>
			</slide>
			<slide id="xdbms-xquery">
				<title>XML Query Language (XQuery)</title>
				<ul>
					<li>XQuery is designed for querying XML databases</li>
					<ul>
						<li>XQuery works on a set of documents</li>
						<li>it returns results as XDM instances (sequences or XML documents)</li>
					</ul>
					<li>XQuery is built on top of XPath 2.0</li>
					<ul>
						<li>XPath 2.0 is a much more powerful language than XPath 1.0</li>
						<li>XPath 2.0 is still limited to selecting parts of an XML document</li>
						<li>XQuery provides facilities to work on multiple documents</li>
						<li>XQuery provides facilities to construct results</li>
					</ul>
					<li>XQuery is comparable to XSLT 2.0</li>
					<ul>
						<li>both are built on top of XPath 2.0</li>
						<li>both are easy to learn if you already know XPath 2.0</li>
						<li>both can be used to process documents to yield results</li>
						<li>XSLT is for programmers, XQuery is for SQL-users</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XQuery Example</title>
				<listing src="videodb.xml" line="220-226"/>
				<listing src="videodb.xml" line="31-33"/>
				<listing src="videos.xql"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Tables and Trees don't Mix</title>
				<ul>
					<li>Tables and trees are different data models</li>
					<li>Different technologies are used to handle these different models</li>
					<li>Think before choosing the wrong tool</li>
				</ul>
			</slide>
			<slide>
				<title>Database Technologies do Mix</title>
				<ul>
					<li>Relational databases are good tools for regular data</li>
					<li>XML databases are good tools for document-oriented XML</li>
					<li>SQL/XML:2007 defines a database that does both</li>
					<li>Applications can choose the best mix of tables and trees</li>
				</ul>
			</slide>
		</part>
	</presentation>
	<presentation id="trends" cover="slidycover">
		<title short="XML Trends">XML Trends &amp; Developments</title>
		<date short="2006-10-24">Tuesday, October 24, 2006</date>
		<toc id="reading"/>
		<toc id="resources"><a href="http://www.w3.org/XML/Activity">W3C XML Activity Statement</a></toc>
		<toc id="abstract">XML is a very basic technology for representing trees using a standardized markup-based syntax. An increasing number of technologies are building on this foundation, creating an expanding field of XML-based technologies for interoperability in many different fields. Application-specific XML-based data formats are used in many different settings, and the best data format for a given scenario depends on the existing formats in this area and the exact requirements. More interestingly, generic XML technologies which can be applied in many different settings make it easier for developers and system integrators to achieve their goal of making system interoperate.</toc>
		<slide id="abstract">
			<title>Abstract</title>
			<p class="abstract"><toc id="abstract"/></p>
		</slide>
		<slide>
			<title>Course Evaluation</title>
			<ul>
				<li>15min to fill out the evaluation forms</li>
				<li>Please take your time and make detailed comments</li>
				<li>Your comments help to improve the course next year</li>
			</ul>
		</slide>
		<part id="webservices">
			<title>Web Services</title>
			<slide>
				<title>XML-Based Distributed Programming</title>
				<ul>
					<li>XML exchanges often have to be negotiated in advance</li>
					<ul>
						<li>the transport mechanism need to be defined</li>
						<li>the schema(s) need to be defined</li>
						<li>the possible interactions between peers need to be defined</li>
					</ul>
					<li>Web Services are a well-defined environment for XML exchanges</li>
					<li>Two very different approaches to XML-based distributed programming</li>
					<ol>
						<li>instead of programming-language specific mechanisms, have you components talk to each other in XML</li>
						<li>instead of simply wrapping APIs in XML, redesign your IT landscape into loosely coupled systems</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Web Service Technologies</title>
				<ul>
					<li><em>Simple Object Access Protocol (SOAP)</em></li>
					<ul>
						<li>SOAP messages have an <em>envelope</em> for <q>Web Service Metadata</q></li>
						<li>SOAP messages have a <em>body</em> containing the actual payload</li>
						<li>non-XML data can be attached in the same way as for e-mail messages</li>
					</ul>
					<li><em>Web Service Description Language (WSDL)</em> for describing SOAP-based services</li>
					<ul>
						<li>the payload format must be known</li>
						<li>the different messages that may be sent must be known</li>
						<li>the transport mechanism must be known</li>
						<li>the address where to send the SOAP to must be known</li>
					</ul>
					<li><em>Universal Description, Discovery, and Integration (UDDI)</em> for making WSDL available</li>
					<ul>
						<li>UDDI is intended to be a repository for WSDL descriptions</li>
						<li>UDDI is not a global service like the DNS</li>
						<li>UDDI is modeled after <em>yellow pages</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>SOAP Example Message</title>
				<listing src="soap-example.xml"/>
			</slide>
			<slide>
				<title>WSDL Example (Google)</title>
				<listing src="GoogleSearch.wsdl" line="89-102"/>
			</slide>
			<slide>
				<title>UDDI Data Model</title>
				<img style="height : 70% ; margin : 2% ; " src="uddi-datamodel.gif"/>
			</slide>
		</part>
		<part id="xforms">
			<title>XForms</title>
			<slide>
				<title>HTML Forms Limitations</title>
				<ul>
					<li>HTML forms are very popular for data entry</li>
					<ul>
						<li>many Web-based applications use HTML forms as their interface</li>
						<li>the features offered by HTML forms are very poor</li>
					</ul>
					<li>HTML forms have a lot of limitations</li>
					<ul>
						<li>they cannot check datatypes (fields are always strings)</li>
						<li>they cannot create new fields (if data entry requires repeatable fields)</li>
						<li>they only work in HTML (integral part of the HTML language)</li>
					</ul>
					<li>Workarounds for better Web-based applications are possible</li>
					<ul>
						<li>JavaScript can be used to provide additional functionality</li>
						<li>server-side engines can provide a back-end for better forms</li>
						<li>writing accessible, portable, and usable forms is a challenge</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>XForms</title>
				<ul>
					<li>XML is the most ubiquitous data format on the Web</li>
					<ul>
						<li>there is no generally support way to edit or produce XML data</li>
						<li>forms should be XML-based rather than being based on HTTP/MIME</li>
					</ul>
					<li>XForms define an XML-based model for data editing and input</li>
					<ul>
						<li>they are separating content from presentation</li>
						<li>clients are free in their choice of data presentation and acquisition</li>
						<li>XForms provide an XML-in, XML-out model of data handling</li>
						<li>XForms can be implemented server- or client-based</li>
					</ul>
					<li>Client-based XForms require <a href="http://www.mozilla.org/projects/xforms/">browser support</a></li>
					<li>Server-based XForms require XForms↔DHTML mappings</li>
				</ul>
			</slide>
			<slide>
				<title>XForms Limitations</title>
				<ul>
					<li>XForms are good for data-oriented XML</li>
					<ul>
						<li>regularly structured data</li>
						<li>no mixed content</li>
					</ul>
					<li>XForms are inappropriate for document-oriented XML</li>
					<ul>
						<li>irregularly structured data is not well-supported</li>
						<li>mixed content is not supported at all</li>
					</ul>
					<li>XForms is for forms, it is not a general XML editing facility</li>
					<ul>
						<li>XML editors often need a lot of customization</li>
						<li>there is no standards-based way for general-purpose XML editing</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="xpath20">
			<title>XPath 2.0</title>
			<slide>
				<title>XML Databases</title>
				<ul>
					<li><link href="xdbms"/> is a very active field</li>
					<ul>
						<li>XML users want to store their XML in something better than a file system</li>
						<li>there must be a way to retrieve XML from this storage</li>
						<li>an XML-specific query language would be the ideal tool</li>
					</ul>
					<li><link href="xpath"/> is a good foundation for a query language</li>
					<ul>
						<li>XPath 1.0 is too simple to be useful (and has no type support)</li>
						<li>XPath 2.0 is a much bigger language (many more functions)</li>
						<li>XPath 2.0 is a more powerful language (more expressive power)</li>
					</ul>
					<li>XPath 2.0 is the new foundation for XML technologies</li>
					<ul>
						<li><link href="xslt20"/> is an improved version of <link href="xslt1">XSLT 1.0</link></li>
						<li><link href="xquery"/> is a query language for XML</li>
						<li>both languages are built on top of XPath 2.0</li>
					</ul>
				</ul>
			</slide>
			<part id="xslt20">
				<title>XSLT 2.0</title>
				<slide>
					<title>XSLT Improvements</title>
					<ul>
						<li>XSLT 2.0 has grown by around 40%</li>
						<li>Some of the most limiting aspects have been removed</li>
						<ul>
							<li>grouping is now part of the language (iterate over groups of nodes)</li>
							<li>stylesheets can now produce more than one result document</li>
							<li>stylesheets can now read (and tokenize) text files</li>
						</ul>
						<li>XSLT 2.0 can be used in conjunction with XML Schema</li>
						<ul>
							<li>the input document is validated and type-annotated</li>
							<li>the result document is <q>validated</q> while being constructed</li>
						</ul>
					</ul>
					<pre>&lt;!-- tokenize the input file by lines and output them as newline-separated list of strings. -->
&lt;xsl:variable name="listing" select="string-join(tokenize(unparsed-text($fileuri, 'UTF-8'), '\r?\n'), '&amp;#xa;')"/>
&lt;xsl:value-of select="if (@tab eq 'retain') then $listing else replace($listing, '\t', ' ')"/></pre>
				</slide>
				<slide>
					<title>XPath vs. XSLT</title>
					<ul>
						<li>XPath 2.0 has grown by around 70%</li>
						<li>XPath 2.0 has many more features than XPath 1.0</li>
						<li>More problems can be solved in XPath directly</li>
						<li>XSLT programming is more powerful and more challenging</li>
						<ul>
							<li>more ways to solve the same problem</li>
							<li>favoring XPath over XSLT is a matter of style (and robustness and maintainability)</li>
						</ul>
					</ul>
					<pre>&lt;xsl:value-of select="if ( @gender = 'male' ) then 'Sir' else 'Madam'"/></pre>
					<pre>&lt;xsl:choose>
  &lt;xsl:when test="@lang = 'en'">English&lt;/xsl:when>
  &lt;xsl:when test="@lang = 'de'">Deutsch&lt;/xsl:when>
  &lt;xsl:when test="@lang = 'fr'">Français&lt;/xsl:when>
  &lt;xsl:otherwise>n/a&lt;/xsl:otherwise>
&lt;/xsl:choose></pre>
				</slide>
			</part>
			<part id="xquery">
				<title>XQuery</title>
				<slide>
					<title>XML Query Language (XQuery)</title>
					<ul>
						<li>XQuery is designed for querying XML databases</li>
						<ul>
							<li>XQuery works on a set of documents</li>
							<li>it returns results as XDM instances (sequences or XML documents)</li>
						</ul>
						<li>XQuery is built on top of XPath 2.0</li>
						<ul>
							<li>XPath 2.0 is a much more powerful language than XPath 1.0</li>
							<li>XPath 2.0 is still limited to selecting parts of an XML document</li>
							<li>XQuery provides facilities to work on multiple documents</li>
							<li>XQuery provides facilities to construct results</li>
						</ul>
						<li>XQuery is comparable to XSLT 2.0</li>
						<ul>
							<li>both are built on top of XPath 2.0</li>
							<li>both are easy to learn if you already know XPath 2.0</li>
							<li>both can be used to process documents to yield results</li>
							<li>XSLT is for programmers, XQuery is for SQL-users</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>XQuery Example</title>
					<listing src="videodb.xml" line="220-226"/>
					<listing src="videodb.xml" line="31-33"/>
					<listing src="videos.xql"/>
				</slide>
			</part>
		</part>
		<part id="semweb">
			<title>Semantic Web</title>
			<slide>
				<title>XML is Syntax</title>
				<ul>
					<li>XML facilitates the exchange of trees</li>
					<li>XML schema languages define constraints for trees</li>
					<li>The meaning of the data encoded in the tree is unclear</li>
					<ul>
						<li>XML has no semantics (with the exception of <attr>xml:lang</attr>)</li>
						<li>semantics have to be agreed upon before cooperation is possible</li>
						<li>XML relies on other mechanisms (documentation, formal models)</li>
					</ul>
				</ul>
				<listing src="japanese2.xml"/>
			</slide>
			<slide>
				<title>Semantics</title>
				<ul>
					<li>Semantics can be defined in <em>ontologies</em></li>
					<ul>
						<li>ontologies are a formalization of a conceptualization</li>
					</ul>
					<li>By referring to ontologies, cooperation can use shared semantics</li>
					<ul>
						<li>of course, this only works if people first agree on the ontology</li>
						<li>domain specialists build ontologies, which are then used for semantics</li>
					</ul>
					<li>Semantic Web technologies revolve around the idea of ontologies</li>
					<ul>
						<li><em>Resource Description Framework (RDF)</em> annotations describe resources semantically</li>
						<li>the ontology is defined using the <em>Web Ontology Language (OWL)</em></li>
						<li>all kinds of AI-style applications are possible using formalized semantics</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>XML is Growing</title>
				<ul>
					<li>XML is the foundation for structured information</li>
					<li>XML is getting closer to programming languages</li>
					<li>XML is becoming the standard toolset for any kind of structured information</li>
					<li>XML itself is simple, but using XML wisely is not always simple</li>
					<li>Schemas and documents may live very long, so plan ahead and choose wisely</li>
				</ul>
			</slide>
			<slide>
				<title>Discussion</title>
				<ul>
					<li>Syntax vs. Model (i.e., brackets vs. Infoset)</li>
					<li>XSLT before XML Schema</li>
					<li>Schema languages (XML Schema vs. alternatives)</li>
					<li>XSLT 2.0 and XPath 2.0 (and XQuery)</li>
					<li>Course length vs. content (full-semester course?)</li>
				</ul>
			</slide>
		</part>
	</presentation>
</xslidy>