Convert German laws from XML to text
For a small project, I needed to convert German laws, found at https://www.gesetze-im-internet.de/, from XML format to text format.
The XML format is described here and is defined by this DTD file.
The source code in the following XSL file is pretty straight-forward. Only adding newlines and indenting definition lists posed an additional challenge.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> | |
<xsl:output method="text" encoding="utf-8" omit-xml-declaration="yes"/> | |
<xsl:strip-space elements="*"/> | |
<xsl:variable name="newline"><xsl:text> | |
</xsl:text></xsl:variable> | |
<xsl:variable name="space"><xsl:text> </xsl:text></xsl:variable> | |
<xsl:variable name="tab" select="concat($space, $space, $space, $space)"/> | |
<xsl:template match="/dokumente"> | |
<xsl:apply-templates select="norm/metadaten/langue"/> | |
<xsl:apply-templates select="norm[metadaten/enbez/text() != 'Inhaltsübersicht' or metadaten/langue and textdaten]"/> | |
<xsl:value-of select="concat($newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline)"/> | |
</xsl:template> | |
<xsl:template match="norm/metadaten/langue"> | |
<xsl:value-of select="concat(normalize-space(.), $newline, $newline, $newline)"/> | |
</xsl:template> | |
<xsl:template match="norm[metadaten/enbez]"> | |
<xsl:choose> | |
<xsl:when test="metadaten/titel"> | |
<xsl:value-of select="concat(metadaten/enbez, $space, $space, normalize-space(metadaten/titel), | |
$newline, $newline)"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="concat(metadaten/enbez, | |
$newline, $newline)"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
<xsl:apply-templates select="textdaten"/> | |
</xsl:template> | |
<xsl:template match="norm[metadaten/langue and textdaten]"> | |
<xsl:apply-templates select="textdaten"/> | |
</xsl:template> | |
<xsl:template match="textdaten"> | |
<xsl:apply-templates select="text/Content"/> | |
<xsl:if test="fussnoten"> | |
<xsl:value-of select="$newline"/> | |
<xsl:apply-templates select="fussnoten/Content"/> | |
</xsl:if> | |
<xsl:value-of select="concat($newline, $newline)"/> | |
</xsl:template> | |
<xsl:template match="P"> | |
<xsl:apply-templates/> | |
<xsl:value-of select="$newline"/> | |
</xsl:template> | |
<xsl:template match="DL"> | |
<xsl:value-of select="$newline"/> | |
<xsl:apply-templates/> | |
<xsl:if test="name(../../..) != 'DL' and name(..) != 'P' and position() != last()"> | |
<xsl:value-of select="$newline"/> | |
</xsl:if> | |
</xsl:template> | |
<xsl:template match="DT"> | |
<xsl:if test="name(../../../..) = 'DL'"> | |
<xsl:value-of select="$tab"/> | |
</xsl:if> | |
<xsl:value-of select="concat($tab, ., $space)"/> | |
</xsl:template> | |
<xsl:template match="DD"> | |
<xsl:apply-templates/> | |
<xsl:if test="position() != last()"> | |
<xsl:value-of select="$newline"/> | |
</xsl:if> | |
</xsl:template> | |
<xsl:template match="BR"> | |
<xsl:choose> | |
<xsl:when test="name(..) = 'entry'"> | |
<xsl:value-of select="$space"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="$newline"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:template> | |
<xsl:template match="TOC"> | |
<!-- Do nothing --> | |
</xsl:template> | |
<xsl:template match="Title"> | |
<xsl:apply-templates/> | |
<xsl:value-of select="$newline"/> | |
</xsl:template> | |
<xsl:template match="table"> | |
<xsl:apply-templates/> | |
</xsl:template> | |
<xsl:template match="row"> | |
<xsl:apply-templates select="entry"/> | |
<xsl:value-of select="$newline"/> | |
</xsl:template> | |
<xsl:template match="entry"> | |
<xsl:apply-templates/> | |
<xsl:if test="position() != last()"> | |
<xsl:value-of select="$tab"/> | |
</xsl:if> | |
</xsl:template> | |
</xsl:stylesheet> |