Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/v0.1.0/v0.9.0/cpp
DOLFINx  0.1.0
DOLFINx C++ interface
HDF5Interface.h
1 // Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <array>
10 #include <cassert>
11 #include <chrono>
12 #include <cstdint>
13 #include <dolfinx/common/log.h>
14 #include <hdf5.h>
15 #include <mpi.h>
16 #include <string>
17 #include <vector>
18 
19 namespace dolfinx
20 {
21 
22 namespace io
23 {
24 
26 
28 {
29 #define HDF5_FAIL -1
30 public:
36  static hid_t open_file(MPI_Comm mpi_comm, const std::string& filename,
37  const std::string& mode, const bool use_mpi_io);
38 
41  static void close_file(const hid_t handle);
42 
45  static void flush_file(const hid_t handle);
46 
50  static std::string get_filename(hid_t handle);
51 
62  template <typename T>
63  static void write_dataset(const hid_t handle, const std::string& dataset_path,
64  const T* data,
65  const std::array<std::int64_t, 2>& range,
66  const std::vector<std::int64_t>& global_size,
67  bool use_mpi_io, bool use_chunking);
68 
77  template <typename T>
78  static std::vector<T> read_dataset(const hid_t handle,
79  const std::string& dataset_path,
80  const std::array<std::int64_t, 2>& range);
81 
86  static bool has_dataset(const hid_t handle, const std::string& dataset_path);
87 
92  static std::vector<std::int64_t>
93  get_dataset_shape(const hid_t handle, const std::string& dataset_path);
94 
101  static void set_mpi_atomicity(const hid_t handle, const bool atomic);
102 
107  static bool get_mpi_atomicity(const hid_t handle);
108 
109 private:
113  static void add_group(const hid_t handle, const std::string& dataset_path);
114 
115  // Return HDF5 data type
116  template <typename T>
117  static hid_t hdf5_type()
118  {
119  throw std::runtime_error("Cannot get HDF5 primitive data type. "
120  "No specialised function for this data type");
121  }
122 };
124 //---------------------------------------------------------------------------
125 template <>
126 inline hid_t HDF5Interface::hdf5_type<float>()
127 {
128  return H5T_NATIVE_FLOAT;
129 }
130 //---------------------------------------------------------------------------
131 template <>
132 inline hid_t HDF5Interface::hdf5_type<double>()
133 {
134  return H5T_NATIVE_DOUBLE;
135 }
136 //---------------------------------------------------------------------------
137 template <>
138 inline hid_t HDF5Interface::hdf5_type<int>()
139 {
140  return H5T_NATIVE_INT;
141 }
142 //---------------------------------------------------------------------------
143 template <>
144 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
145 {
146  return H5T_NATIVE_INT64;
147 }
148 //---------------------------------------------------------------------------
149 template <>
150 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
151 {
152  if (sizeof(std::size_t) == sizeof(unsigned long))
153  return H5T_NATIVE_ULONG;
154  else if (sizeof(std::size_t) == sizeof(unsigned int))
155  return H5T_NATIVE_UINT;
156 
157  throw std::runtime_error("Cannot determine size of std::size_t. "
158  "std::size_t is not the same size as long or int");
159 }
160 //---------------------------------------------------------------------------
161 template <typename T>
162 inline void HDF5Interface::write_dataset(
163  const hid_t file_handle, const std::string& dataset_path, const T* data,
164  const std::array<std::int64_t, 2>& range,
165  const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
166 {
167  // Data rank
168  const std::size_t rank = global_size.size();
169  assert(rank != 0);
170 
171  if (rank > 2)
172  {
173  throw std::runtime_error("Cannot write dataset to HDF5 file"
174  "Only rank 1 and rank 2 dataset are supported");
175  }
176 
177  // Get HDF5 data type
178  const hid_t h5type = hdf5_type<T>();
179 
180  // Hyperslab selection parameters
181  std::vector<hsize_t> count(global_size.begin(), global_size.end());
182  count[0] = range[1] - range[0];
183 
184  // Data offsets
185  std::vector<hsize_t> offset(rank, 0);
186  offset[0] = range[0];
187 
188  // Dataset dimensions
189  const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
190 
191  // Generic status report
192  herr_t status;
193 
194  // Create a global data space
195  const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
196  assert(filespace0 != HDF5_FAIL);
197 
198  // Set chunking parameters
199  hid_t chunking_properties;
200  if (use_chunking)
201  {
202  // Set chunk size and limit to 1kB min/1MB max
203  hsize_t chunk_size = dimsf[0] / 2;
204  if (chunk_size > 1048576)
205  chunk_size = 1048576;
206  if (chunk_size < 1024)
207  chunk_size = 1024;
208 
209  hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
210  chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
211  H5Pset_chunk(chunking_properties, rank, chunk_dims);
212  }
213  else
214  chunking_properties = H5P_DEFAULT;
215 
216  // Check that group exists and recursively create if required
217  const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
218  add_group(file_handle, group_name);
219 
220  // Create global dataset (using dataset_path)
221  const hid_t dset_id
222  = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
223  H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
224  assert(dset_id != HDF5_FAIL);
225 
226  // Close global data space
227  status = H5Sclose(filespace0);
228  assert(status != HDF5_FAIL);
229 
230  // Create a local data space
231  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
232  assert(memspace != HDF5_FAIL);
233 
234  // Create a file dataspace within the global space - a hyperslab
235  const hid_t filespace1 = H5Dget_space(dset_id);
236  status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
237  nullptr, count.data(), nullptr);
238  assert(status != HDF5_FAIL);
239 
240  // Set parallel access
241  const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
242  if (use_mpi_io)
243  {
244 #ifdef H5_HAVE_PARALLEL
245  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
246  assert(status != HDF5_FAIL);
247 #else
248  throw std::runtime_error("HDF5 library has not been configured with MPI");
249 #endif
250  }
251 
252  // Write local dataset into selected hyperslab
253  status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
254  assert(status != HDF5_FAIL);
255 
256  if (use_chunking)
257  {
258  // Close chunking properties
259  status = H5Pclose(chunking_properties);
260  assert(status != HDF5_FAIL);
261  }
262 
263  // Close dataset collectively
264  status = H5Dclose(dset_id);
265  assert(status != HDF5_FAIL);
266 
267  // Close hyperslab
268  status = H5Sclose(filespace1);
269  assert(status != HDF5_FAIL);
270 
271  // Close local dataset
272  status = H5Sclose(memspace);
273  assert(status != HDF5_FAIL);
274 
275  // Release file-access template
276  status = H5Pclose(plist_id);
277  assert(status != HDF5_FAIL);
278 }
279 //---------------------------------------------------------------------------
280 template <typename T>
281 inline std::vector<T>
282 HDF5Interface::read_dataset(const hid_t file_handle,
283  const std::string& dataset_path,
284  const std::array<std::int64_t, 2>& range)
285 {
286  auto timer_start = std::chrono::system_clock::now();
287 
288  // Open the dataset
289  const hid_t dset_id
290  = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
291  assert(dset_id != HDF5_FAIL);
292 
293  // Open dataspace
294  const hid_t dataspace = H5Dget_space(dset_id);
295  assert(dataspace != HDF5_FAIL);
296 
297  // Get rank of data set
298  const int rank = H5Sget_simple_extent_ndims(dataspace);
299  assert(rank >= 0);
300 
301  if (rank > 2)
302  LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
303 
304  // Allocate data for shape
305  std::vector<hsize_t> shape(rank);
306 
307  // Get size in each dimension
308  const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
309  assert(ndims == rank);
310 
311  // Hyperslab selection
312  std::vector<hsize_t> offset(rank, 0);
313  std::vector<hsize_t> count = shape;
314  if (range[0] != -1 and range[1] != -1)
315  {
316  offset[0] = range[0];
317  count[0] = range[1] - range[0];
318  }
319  else
320  offset[0] = 0;
321 
322  // Select a block in the dataset beginning at offset[], with
323  // size=count[]
324  herr_t status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset.data(),
325  nullptr, count.data(), nullptr);
326  assert(status != HDF5_FAIL);
327 
328  // Create a memory dataspace
329  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
330  assert(memspace != HDF5_FAIL);
331 
332  // Create local data to read into
333  std::size_t data_size = 1;
334  for (std::size_t i = 0; i < count.size(); ++i)
335  data_size *= count[i];
336  std::vector<T> data(data_size);
337 
338  // Read data on each process
339  const hid_t h5type = hdf5_type<T>();
340  status
341  = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
342  assert(status != HDF5_FAIL);
343 
344  // Close dataspace
345  status = H5Sclose(dataspace);
346  assert(status != HDF5_FAIL);
347 
348  // Close memspace
349  status = H5Sclose(memspace);
350  assert(status != HDF5_FAIL);
351 
352  // Close dataset
353  status = H5Dclose(dset_id);
354  assert(status != HDF5_FAIL);
355 
356  auto timer_end = std::chrono::system_clock::now();
357  std::chrono::duration<double> dt = (timer_end - timer_start);
358  double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
359 
360  LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
361 
362  return data;
363 }
364 //---------------------------------------------------------------------------
366 } // namespace io
367 } // namespace dolfinx
dolfinx::io::HDF5Interface::close_file
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:116
dolfinx::io::HDF5Interface
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:27
dolfinx::io::HDF5Interface::open_file
static hid_t open_file(MPI_Comm mpi_comm, const std::string &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
dolfinx::io::HDF5Interface::set_mpi_atomicity
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:228
dolfinx::io::HDF5Interface::has_dataset
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:145
dolfinx::io::HDF5Interface::read_dataset
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
dolfinx::io::HDF5Interface::get_dataset_shape
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:195
dolfinx::io::HDF5Interface::flush_file
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:122
dolfinx::io::HDF5Interface::write_dataset
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
dolfinx::io::HDF5Interface::get_mpi_atomicity
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:236
dolfinx::io::HDF5Interface::get_filename
static std::string get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:128