XML File Format (DTD)

XML File Format (DTD)

The structure of the XML format is defined in the following DTD file:
        <!--
        This is the DTD for "IAM Handwriting Database" metadata.
        The system-id is 'http://www.iam.unibe.ch/~fki/iamDB/form-metadata.dtd', there
        is no public-id yet. The java classes map the system-id to the java resource
        ch/unibe/iam/handwirtingDB/form-metadata.dtd in the handwirtingDB jar-file.
        -->
        <!--
        Formular
                created:                creation date, format yyyy-mm-dd
                height:                 height of image data, in pixel
                id:                     identification number of form, format /[a-z]\d\d\d-\d\d\d\w?/
                last-modified:          modification date, format yyyy-mm-dd
                skew:                   form skew, in 1000th degree
                status:                 form status, is "final"
                version:                version, is "3.0"
                width:                  width of image data, in pixel
                wid:                    writer identification, a number
        -->
        <!ELEMENT form (machine-printed-part, handwritten-part)>
        <!ATTLIST form
                created                 CDATA #REQUIRED
                height                  CDATA #REQUIRED
                id                      ID      #REQUIRED
                last-modified           CDATA #REQUIRED
                skew                    CDATA #REQUIRED
                status                  (final|verified|segmented|raw) #FIXED "final"
                version                 (3.0_beta|3.0) #REQUIRED
                width                   CDATA #REQUIRED
                writer-id               CDATA #REQUIRED
                select                  CDATA #IMPLIED
        >
        <!--
        Machine Printed Part
                text:                   content of line, a string
        -->
        <!ELEMENT machine-printed-part (machine-print-line)+>
        <!ELEMENT machine-print-line EMPTY>
        <!ATTLIST machine-print-line
                text                    CDATA #REQUIRED
        >
        <!--
        Hand Written Part
        -->
        <!ELEMENT handwritten-part (line)+>
        <!--
        Line
                ass:                    ascender slope, 1000th degree
                asx:                    ascender x value, is "0"
                asy:                    ascender y value
                character-width:        average character width (unused), is "999"
                dss:                    descender slope, 1000th degree
                dsx:                    descender x value, is "0"
                dsy:                    descender y value
                fd0:                    fractal dimension 0 (see master of Caroline Hertel)
                fd1:                    fractal dimension 1
                fd2:                    fractal dimension 2
                filter-width:           filter width (meaning unknown ???)
                id:                     identifier, format is: indentfier of enclosing
                                                form "-" line number (starting with 0)
                lss:                    lower baseline slope, 1000th degree
                lsx:                    lower baseline x value, is "0"
                lsy:                    lower baseline y value
                segmentation:           result of word segmentation, is "err" or "ok"
                slant:                  slant, 1000th degree
                stroke-width:           average stroke width, in pixel
                threshold:              threshold of binarization
                text:                   content of line, a string
                uss:                    upper baseline slope, 1000th degree
                usx:                    upper baseline x value, is "0"
                usy:                    upper baseline y value
        -->
        <!ELEMENT line ((word)+, (upper-contour)?, (lower-contour)?)>
        <!ATTLIST line
                ass                     CDATA #REQUIRED
                asx                     CDATA #FIXED "0"
                asy                     CDATA #REQUIRED
                character-width         CDATA #FIXED "999"
                dss                     CDATA #REQUIRED
                dsx                     CDATA #FIXED "0"
                dsy                     CDATA #REQUIRED
                fd0                     CDATA #IMPLIED
                fd1                     CDATA #IMPLIED
                fd2                     CDATA #IMPLIED
                filter-width            CDATA #REQUIRED
                id                      ID    #REQUIRED
                lbs                     CDATA #REQUIRED
                lbx                     CDATA #FIXED "0"
                lby                     CDATA #REQUIRED
                segmentation            (ok|err) #REQUIRED
                slant                   CDATA #REQUIRED
                stroke-width            CDATA #REQUIRED
                threshold               CDATA #REQUIRED
                text                    CDATA #REQUIRED
                ubs                     CDATA #REQUIRED
                ubx                     CDATA #FIXED "0"
                uby                     CDATA #REQUIRED
        >
        <!-- Word
                id:                     identifier, format is: indentfier of enclosing
                                                line "-" word number (starting with 0)
                sentence-start:         initial-sentence-marker, is "yes" or "no"
                tag:                    type of word, as defined by ??? standard
                text:                   value of word, a string
        -->
        <!ELEMENT word (cmp)*>
        <!ATTLIST word
                id                      ID #REQUIRED
                sentence-start          (yes|no) "no"
                tag                     CDATA #REQUIRED
                text                    CDATA #REQUIRED
        >
        <!-- Component
                x:                      x position
                y:                      y position
                width:                  width, in pixel
                height:                 height, in pixel
        -->
        <!ELEMENT cmp EMPTY>
        <!ATTLIST cmp
                x                       CDATA #REQUIRED
                y                       CDATA #REQUIRED
                width                   CDATA #REQUIRED
                height                  CDATA #REQUIRED
        >
        <!-- Contour Data
                x:                      x position
                y:                      y position
        -->
        <!ELEMENT upper-contour (point)+>
        <!ELEMENT lower-contour (point)+>
        <!ELEMENT point EMPTY>
        <!ATTLIST point
                x                       CDATA #REQUIRED
                y                       CDATA #REQUIRED
        >
Document Actions