HDF5 documents and links 
Introduction to HDF5 
HDF5 User’s Guide 
HDF5 Reference Manual 

DDL in BNF for HDF5

1. Introduction

This document contains the data description language (DDL) for an HDF5 file. The description is in Backus-Naur Form.

2. Explanation of Symbols

This section contains a brief explanation of the symbols used in the DDL.
    ::=                      defined as
    <tname>                  a token with the name tname
    <a> | <b>                one of <a> or <b>
    <a>opt                    zero or one occurrence of <a>
    <a>*                     zero or more occurrence of <a>
    <a>+                     one or more occurrence of <a>
    [0-9]                    an element in the range between 0 and 9
    '['                      the token within the quotes (used for special characters)
    TBD                      To Be Decided

3. The DDL

<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> }

<file_name> ::= <identifier>

<file_super_block> ::= SUPER_BLOCK {
                           SUPERBLOCK_VERSION <int_value>
                           FREELIST_VERSION <int_value>
                           SYMBOLTABLE_VERSION <int_value>
                           OBJECTHEADER_VERSION <int_value>
                           OFFSET_SIZE <int_value>
                           LENGTH_SIZE <int_value>
                           BTREE_RANK <int_value>
                           BTREE_LEAF <int_value>
                           ISTORE_K <int_value>
                           <super_block_filespace>
                           USER_BLOCK {
                               USERBLOCK_SIZE <int_value>
                           }
                       }

<super_block_filespace> ::= FILE_SPACE_STRATEGY <super_block_strategy>
                            FREE_SPACE_PERSIST <boolean_value>
                            FREE_SPACE_SECTION_THRESHOLD <int_value>
                            FILE_SPACE_PAGE_SIZE <int_value>

<super_block_strategy> ::= H5F_FSPACE_STRATEGY_FSM_AGGR | H5F_FSPACE_STRATEGY_PAGE |
                             H5F_FSPACE_STRATEGY_AGGR | H5F_FSPACE_STRATEGY_NONE |
                             Unknown strategy

<root_group> ::= GROUP "/" {
                     <anon_named_datatype>*
                     <object_id>opt
                     <group_comment>opt
                     <group_attribute>*
                     <group_member>*
                 }

<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type>

<anon_named_datatype> ::= DATATYPE <anon_named_type_name> {
                              <datatype>
                          }

<anon_named_type_name> ::= the assigned name for anonymous named type is
                           in the form of #oid, where oid is the object id
                           of the type

<atomic_type> ::= <integer> | <float> | <time> | <string> |
                  <bitfield> | <opaque> | <reference> | <enum>

<boolean_value> ::= FALSE | TRUE

<integer> ::= H5T_STD_I8BE     | H5T_STD_I8LE      |
              H5T_STD_I16BE    | H5T_STD_I16LE     |
              H5T_STD_I32BE    | H5T_STD_I32LE     |
              H5T_STD_I64BE    | H5T_STD_I64LE     |
              H5T_STD_U8BE     | H5T_STD_U8LE      |
              H5T_STD_U16BE    | H5T_STD_U16LE     |
              H5T_STD_U32BE    | H5T_STD_U32LE     |
              H5T_STD_U64BE    | H5T_STD_U64LE     |
              H5T_NATIVE_CHAR  | H5T_NATIVE_UCHAR  |
              H5T_NATIVE_SHORT | H5T_NATIVE_USHORT |
              H5T_NATIVE_INT   | H5T_NATIVE_UINT   |
              H5T_NATIVE_LONG  | H5T_NATIVE_ULONG  |
              H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG

<float> ::= H5T_IEEE_F32BE   | H5T_IEEE_F32LE     |
            H5T_IEEE_F64BE   | H5T_IEEE_F64LE     |
            H5T_NATIVE_FLOAT |  H5T_NATIVE_DOUBLE |
            H5T_NATIVE_LDOUBLE

<time> ::= H5T_TIME: not yet implemented

<string> ::= H5T_STRING {
                 STRSIZE <strsize>;
                 STRPAD <strpad>;
                 CSET <cset>;
                 CTYPE <ctype>;
             }

<strsize> ::= <int_value>

<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD

<cset> ::= H5T_CSET_ASCII | H5T_CSET_UTF8

<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1

<bitfield> ::= H5T_STD_B8BE     | H5T_STD_B8LE      |
               H5T_STD_B16BE    | H5T_STD_B16LE     |
               H5T_STD_B32BE    | H5T_STD_B32LE     |
               H5T_STD_B64BE    | H5T_STD_B64LE

<opaque> ::= H5T_OPAQUE {
                 OPAQUE_TAG <identifier>;
                 OPAQUE_SIZE <int_value>;opt
             }

<reference> ::= H5T_REFERENCE { <ref_type> }

<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG | H5T_STD_REF | UNDEFINED

<compound_type> ::= H5T_COMPOUND {
                        <member_type_def>+
                    }

<member_type_def> ::= <datatype> <field_name>;

<field_name> ::= <identifier>

<variable_length_type> ::= H5T_VLEN { <datatype> }

<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> }

<dim_sizes> ::= '['<dimsize>']' | '['<dimsize>']'<dim_sizes>

<dimsize> ::= <int_value>

<attribute> ::= ATTRIBUTE <attr_name> {
                    <dataset_type>
                    <dataset_space>
                    <data>opt
                }

<attr_name> ::= <identifier>

<dataset_type> ::= DATATYPE <path_name> | <datatype>

<enum> ::= H5T_ENUM {
               <enum_base_type> <enum_def>+
           }

<enum_base_type> ::= <integer>
// Currently enums can only hold integer type data, but they may be expanded
// in the future to hold any datatype

<enum_def> ::= <enum_symbol> <enum_val>;

<enum_symbol> ::= <identifier>

<enum_val> ::= <int_value>

<path_name> ::= <path_part>+

<path_part> ::= /<identifier>

<dataspace> ::= <scalar_space> | <simple_space> | <complex_space> | <null_space>

<null_space> ::= NULL

<scalar_space> ::= SCALAR

<simple_space> ::= SIMPLE { <current_dims> / <max_dims> }

<complex_space> ::= COMPLEX { <complex_space_definition> }

<dataset_space> ::= DATASPACE <path_name> | <dataspace>

<current_dims> ::= <dims>

<max_dims> ::= '(' <max_dim_list> ')'

<max_dim_list> ::= <max_dim> | <max_dim>, <max_dim_list>

<max_dim> ::= <int_value> | H5S_UNLIMITED

<data> ::= <subset> | <data_values>

<data_values> ::= DATA {
               <scalar_space_data> | <simple_space_data>
           }

<scalar_space_data> ::= <any_element>

<any_element> ::= <atomic_element> | <compound_element> |
                  <variable_length_element> | <array_element>

<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq>

<atomic_element> :: = <integer_data> | <float_data>    | <time_data>   |
                     <string_data>  | <bitfield_data> | <opaque_data> |
                     <enum_data>    | <reference_data>

<subset> ::= SUBSET {
                 <start>;
                 <stride>;
                 <count>;
                 <block>;
                 DATA {
                     <simple_space_data>
                 }
             }

<start> ::= START (<coor_list>)

<stride> ::= STRIDE (<pos_list>)

<count> ::= COUNT (<max_dim_list>)

<block> ::= BLOCK (<max_dim_list>)

<coor_list> ::= <coor_data>, <coor_list> | <coor_data>

<coor_data> ::= <integer_data> | H5S_UNLIMITED

<integer_data> ::= <int_value>

<float_data> ::= a floating point number

<time_data> ::= DATA{ not yet implemented.}

<string_data> ::= a string
// A string is enclosed in double quotes.
// If a string is displayed on more than one line, string concatenate
// operator '//'is used.

<bitfield_data> ::= <hex_value>

<opaque_data> ::= <hex_value>:<hex_value> | <hex_value>

<enum_data> ::= <enum_symbol>

<reference_data> ::= <object_ref_data> | <data_region_data> | <attribute_data> | NULL

<object_ref_data> ::= <object_type> <object_num>

<object_type> ::= DATASET | GROUP | DATATYPE

<object_id> ::= OBJECTID { <object_num> }

<object_num> ::= <int_value>:<int_value> | <int_value>

<attribute_data> ::= ATTRIBUTE <attr_name>

<data_region_data> ::= DATASET <dataset_name> {
                           <data_region_type>opt <data_region_data_list>
                           <dataset_type>opt <dataset_space>opt
                           <data>opt
                       }

<data_region_type> ::= REGION_TYPE <data_region_data_type>

<data_region_data_type> ::= POINT | BLOCK

<data_region_data_list> ::= <data_region_data_info>, <data_region_data_list> |
                            <data_region_data_info>

<data_region_data_info> ::= <region_info> | <point_info>

<region_info> ::= (<lower_region_vals>)-(<upper_region_vals>)

<lower_region_vals> ::= <lower_bound>, <lower_region_vals> | <lower_bound>

<upper_region_vals> ::= <upper_bound>, <upper_region_vals> | <upper_bound>

<lower_bound> ::= <int_value>

<upper_bound> ::= <int_value>

<point_info> ::= (<point_vals>)

<point_vals> ::= <int_value> | <int_value>, <point_vals>

<compound_element> ::= { <any_data_seq> }

<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> |
                          <atomic_element>

<simple_space_data> :: = <any_data_seq>

<variable_length_element> ::= ( <any_data_seq> )

<array_element> ::= '[' <any_data_seq> ']'

<named_datatype> ::= DATATYPE <type_name> { <datatype> }

<type_name> ::= <identifier>

<hardlink> ::= HARDLINK <path_name>

<group> ::= GROUP <group_name> { <hardlink> | <group_info> }

<group_comment> ::= COMMENT <string_data>

<group_name> ::= <identifier>

<group_info> ::= <object_id>opt <group_comment>opt <group_attribute>*
                 <group_member>*

<group_attribute> ::= <attribute>

<group_member> ::= <named_datatype> | <group> | <dataset> |
                   <softlink> | <external_link>

<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> }

<dataset_info> ::= <dataset_type>
                   <dataset_space>
                   <dcpl_info>opt
                   <dataset_attribute>* <object_id>opt
                   <data>opt
// Tokens above can be in any order as long as <data> is
// after <dataset_type> and <dataset_space>.

<dcpl_info> ::= <storagelayout>
                <compression_filters>
                <fillvalue>
                <allocationtime>

<dataset_name> ::= <identifier>

<storagelayout> :: =  STORAGE_LAYOUT {
                         <contiguous_layout> | <chunked_layout> |
                         <compact_layout> | <virtual_layout>
                    }

<contiguous_layout> ::= CONTIGUOUS
                        <internal_layout> | <external_layout>

<chunked_layout> ::= CHUNKED <dims>
                     <filter_ratio>opt

<compact_layout> ::= COMPACT
                     <size>

<internal_layout> ::= <size>
                      <offset>

<external_layout> ::= EXTERNAL {
                          <external_file>+
                      }

<virtual_layout> ::= <vmaps>*opt

<vmaps> ::= MAPPING <int_value> {
                <virtual_map>
                <source_map>
            }

<virtual_map> ::= VIRTUAL {
                      <vmaps_selection>
                  }

<source_map> ::= SOURCE {
                     FILE <file_name>
                     DATASET <dataset_name>
                     <vmaps_selection>
                 }

<vmaps_selection> ::= <regular_hyperslab> | <irregular_hyperslab> |
                      <select_points> | <select_none> | <select_all>

<regular_hyperslab> ::= SELECTION REGULAR_HYPERSLAB {
                          <start>
                          <stride>
                          <count>
                          <block>
                      }

<irregular_hyperslab> ::= SELECTION IRREGULAR_HYPERSLAB {
                              <region_info>+
                          }

<select_points> ::= SELECTION POINT {
                        (<coor_list>)+
                    }

<select_none> ::= SELECTION NONE

<select_all> ::= SELECTION ALL

<dims> ::= (<dims_values>)

<dims_values> ::= <int_value> | <int_value>, <dims_values>

<external_file> ::= FILENAME <file_name> <size> <offset>

<offset> ::= OFFSET <int_value>

<size> ::= SIZE <int_value>

<filter_ratio> ::= <size> | <compressionratio>

<compressionratio> :: = <size> (<float_data>:1 COMPRESSION)

<compression_filters> :: = FILTERS {
                               <filter_type>+ | NONE
                          }

<filter_type> :: = <filter_deflate>   | <filter_shuffle>     |
                    <filter_flecther> | <filter_szip>        |
                    <filter_nbit>     | <filter_scaleoffset> |
                    <filter_default>

<filter_default> :: = <filter_user> {
                          FILTER_ID <int_value>
                          <filter_comment>opt
                          <filter_params>opt
                     }

<filter_user> :: = USER_DEFINED_FILTER

<filter_deflate> :: = COMPRESSION DEFLATE { LEVEL <int_value> }

<filter_shuffle> :: = PREPROCESSING SHUFFLE

<filter_flecther> :: = CHECKSUM FLETCHER32

<filter_szip> :: = COMPRESSION SZIP {
                       PIXELS_PER_BLOCK <int_value>
                       <filter_szip_mode>opt
                       <filter_szip_coding>opt
                       <filter_szip_order>opt
                       <filter_szip_header>opt
                  }

<filter_szip_mode> :: = MODE HARDWARE | K13

<filter_szip_coding> :: = CODING ENTROPY | NEAREST NEIGHBOUR

<filter_szip_order> :: = BYTE_ORDER LSB | MSB

<filter_szip_header> :: = HEADER RAW

<filter_nbit> :: = CHECKSUM NBIT

<filter_scaleoffset> :: = COMPRESSION SCALEOFFSET { MIN BITS <int_value> }

<filter_comment> :: = COMMENT  <identifier>

<filter_params> :: = PARAMS { <int_value>* }

<fillvalue> ::= FILLVALUE {
                    FILL_TIME H5D_FILL_TIME_ALLOC | H5D_FILL_TIME_NEVER | H5D_FILL_TIME_IFSET
                    VALUE H5D_FILL_VALUE_UNDEFINED | H5D_FILL_VALUE_DEFAULT | <any_element>
                }

<allocationtime> ::= ALLOCATION_TIME {
                         H5D_ALLOC_TIME_EARLY | H5D_ALLOC_TIME_INCR |
                         H5D_ALLOC_TIME_LATE
                     }

<dataset_attribute> ::= <attribute>

<softlink> ::= SOFTLINK <softlink_name> {
                   LINKTARGET <target>
               }

<softlink_name> ::= <identifier>

<target> ::= <identifier>

<external_link> ::= EXTERNAL_LINK <external_link_name> {
                        TARGETFILE <targetfile>
                        TARGETPATH <targetpath> <targetobj>opt
                    }

<external_link_name> ::= <identifier>

<user_defined_link> ::= USERDEFINED_LINK <external_link_name> {
                            LINKCLASS <user_link_type>
                        }

<user_link_type> ::= <int_value>

<targetfile> ::= <file_name>

<targetpath> ::= <identifier>

<targetobj> ::= <named_datatype> | <group> | <dataset>

<identifier> ::= "a string"
// character '/' should be used with care.

<pos_list> ::= <pos_int>, <pos_list> | <pos_int>

<int_value> ::= 0 | <pos_int>

<pos_int> ::= [1-9][0-9]*

<hex_value> ::= 0x[0-F][0-F]+ | [0-F][0-F]+

4. An Example of an HDF5 File in DDL

HDF5 "example.h5" {
GROUP "/" {
   ATTRIBUTE "attr1" {
      DATATYPE H5T_STRING {
           STRSIZE 17;
           STRPAD H5T_STR_NULLTERM;
           CSET H5T_CSET_ASCII;
           CTYPE H5T_C_S1;
         }
      DATASPACE SCALAR
      DATA {
         "string attribute"
      }
   }
   DATASET "dset1" {
      DATATYPE H5T_STD_I32BE
      DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) }
      DATA {
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9
      }
   }
   DATASET "dset2" {
      DATATYPE H5T_COMPOUND {
         H5T_STD_I32BE "a";
         H5T_IEEE_F32BE "b";
         H5T_IEEE_F64BE "c";
      }
      DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
      DATA {
         {
            1,
            0.1,
            0.01
         },
         {
            2,
            0.2,
            0.02
         },
         {
            3,
            0.3,
            0.03
         },
         {
            4,
            0.4,
            0.04
         },
         {
            5,
            0.5,
            0.05
         }
      }
   }
   GROUP "group1" {
      COMMENT "This is a comment for group1";
      DATASET "dset3" {
         DATATYPE "/type1"
         DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
         DATA {
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            }
         }
      }
   }
   DATASET "dset3" {
      DATATYPE H5T_VLEN { H5T_STD_I32LE }
      DATASPACE SIMPLE { ( 4 ) / ( 4 ) }
      DATA {
         (0), (10, 11), (20, 21, 22), (30, 31, 32, 33)
      }
   }
   GROUP "group2" {
      HARDLINK "/group1"
   }
   SOFTLINK "slink1" {
      LINKTARGET "somevalue"
   }
   DATATYPE "type1" H5T_COMPOUND {
      H5T_ARRAY { [4] H5T_STD_I32BE } "a";
      H5T_ARRAY { [5][6] H5T_IEEE_F32BE } "b";
   }
}
}

HDF5 documents and links 
Introduction to HDF5 
HDF5 User’s Guide 
HDF5 Reference Manual 

Last modified: 24 September 2014