Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/dolfinx/v0.9.0/cpp/doxygen/d9/d1f/HDF5Interface_8h_source.html
DOLFINx  0.5.1
DOLFINx C++ interface
HDF5Interface.h
1 // Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <array>
10 #include <cassert>
11 #include <chrono>
12 #include <cstdint>
13 #include <dolfinx/common/log.h>
14 #include <filesystem>
15 #include <hdf5.h>
16 #include <mpi.h>
17 #include <numeric>
18 #include <string>
19 #include <vector>
20 
21 namespace dolfinx::io
22 {
23 
25 
27 {
28 #define HDF5_FAIL -1
29 public:
35  static hid_t open_file(MPI_Comm comm, const std::filesystem::path& filename,
36  const std::string& mode, const bool use_mpi_io);
37 
40  static void close_file(const hid_t handle);
41 
44  static void flush_file(const hid_t handle);
45 
49  static std::filesystem::path get_filename(hid_t handle);
50 
61  template <typename T>
62  static void write_dataset(const hid_t handle, const std::string& dataset_path,
63  const T* data,
64  const std::array<std::int64_t, 2>& range,
65  const std::vector<std::int64_t>& global_size,
66  bool use_mpi_io, bool use_chunking);
67 
76  template <typename T>
77  static std::vector<T> read_dataset(const hid_t handle,
78  const std::string& dataset_path,
79  const std::array<std::int64_t, 2>& range);
80 
85  static bool has_dataset(const hid_t handle, const std::string& dataset_path);
86 
91  static std::vector<std::int64_t>
92  get_dataset_shape(const hid_t handle, const std::string& dataset_path);
93 
100  static void set_mpi_atomicity(const hid_t handle, const bool atomic);
101 
106  static bool get_mpi_atomicity(const hid_t handle);
107 
108 private:
112  static void add_group(const hid_t handle, const std::string& dataset_path);
113 
114  // Return HDF5 data type
115  template <typename T>
116  static hid_t hdf5_type()
117  {
118  throw std::runtime_error("Cannot get HDF5 primitive data type. "
119  "No specialised function for this data type");
120  }
121 };
123 //---------------------------------------------------------------------------
124 template <>
125 inline hid_t HDF5Interface::hdf5_type<float>()
126 {
127  return H5T_NATIVE_FLOAT;
128 }
129 //---------------------------------------------------------------------------
130 template <>
131 inline hid_t HDF5Interface::hdf5_type<double>()
132 {
133  return H5T_NATIVE_DOUBLE;
134 }
135 //---------------------------------------------------------------------------
136 template <>
137 inline hid_t HDF5Interface::hdf5_type<int>()
138 {
139  return H5T_NATIVE_INT;
140 }
141 //---------------------------------------------------------------------------
142 template <>
143 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
144 {
145  return H5T_NATIVE_INT64;
146 }
147 //---------------------------------------------------------------------------
148 template <>
149 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
150 {
151  if (sizeof(std::size_t) == sizeof(unsigned long))
152  return H5T_NATIVE_ULONG;
153  else if (sizeof(std::size_t) == sizeof(unsigned int))
154  return H5T_NATIVE_UINT;
155 
156  throw std::runtime_error("Cannot determine size of std::size_t. "
157  "std::size_t is not the same size as long or int");
158 }
159 //---------------------------------------------------------------------------
160 template <typename T>
161 inline void HDF5Interface::write_dataset(
162  const hid_t file_handle, const std::string& dataset_path, const T* data,
163  const std::array<std::int64_t, 2>& range,
164  const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
165 {
166  // Data rank
167  const int rank = global_size.size();
168  assert(rank != 0);
169  if (rank > 2)
170  {
171  throw std::runtime_error("Cannot write dataset to HDF5 file"
172  "Only rank 1 and rank 2 dataset are supported");
173  }
174 
175  // Get HDF5 data type
176  const hid_t h5type = hdf5_type<T>();
177 
178  // Hyperslab selection parameters
179  std::vector<hsize_t> count(global_size.begin(), global_size.end());
180  count[0] = range[1] - range[0];
181 
182  // Data offsets
183  std::vector<hsize_t> offset(rank, 0);
184  offset[0] = range[0];
185 
186  // Dataset dimensions
187  const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
188 
189  // Create a global data space
190  const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
191  if (filespace0 == HDF5_FAIL)
192  throw std::runtime_error("Failed to create HDF5 data space");
193 
194  // Set chunking parameters
195  hid_t chunking_properties;
196  if (use_chunking)
197  {
198  // Set chunk size and limit to 1kB min/1MB max
199  hsize_t chunk_size = dimsf[0] / 2;
200  if (chunk_size > 1048576)
201  chunk_size = 1048576;
202  if (chunk_size < 1024)
203  chunk_size = 1024;
204 
205  hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
206  chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
207  H5Pset_chunk(chunking_properties, rank, chunk_dims);
208  }
209  else
210  chunking_properties = H5P_DEFAULT;
211 
212  // Check that group exists and recursively create if required
213  const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
214  add_group(file_handle, group_name);
215 
216  // Create global dataset (using dataset_path)
217  const hid_t dset_id
218  = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
219  H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
220  if (dset_id == HDF5_FAIL)
221  throw std::runtime_error("Failed to create HDF5 global dataset.");
222 
223  // Generic status report
224  herr_t status;
225 
226  // Close global data space
227  status = H5Sclose(filespace0);
228  if (status == HDF5_FAIL)
229  throw std::runtime_error("Failed to close HDF5 global data space.");
230 
231  // Create a local data space
232  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
233  if (memspace == HDF5_FAIL)
234  throw std::runtime_error("Failed to create HDF5 local data space.");
235 
236  // Create a file dataspace within the global space - a hyperslab
237  const hid_t filespace1 = H5Dget_space(dset_id);
238  status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
239  nullptr, count.data(), nullptr);
240  if (status == HDF5_FAIL)
241  throw std::runtime_error("Failed to create HDF5 dataspace.");
242 
243  // Set parallel access
244  const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
245  if (use_mpi_io)
246  {
247 #ifdef H5_HAVE_PARALLEL
248  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
249  if (status == HDF5_FAIL)
250  {
251  throw std::runtime_error(
252  "Failed to set HDF5 data transfer property list.");
253  }
254 
255 #else
256  throw std::runtime_error("HDF5 library has not been configured with MPI");
257 #endif
258  }
259 
260  // Write local dataset into selected hyperslab
261  status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
262  if (status == HDF5_FAIL)
263  {
264  throw std::runtime_error(
265  "Failed to write HDF5 local dataset into hyperslab.");
266  }
267 
268  if (use_chunking)
269  {
270  // Close chunking properties
271  status = H5Pclose(chunking_properties);
272  if (status == HDF5_FAIL)
273  throw std::runtime_error("Failed to close HDF5 chunking properties.");
274  }
275 
276  // Close dataset collectively
277  status = H5Dclose(dset_id);
278  if (status == HDF5_FAIL)
279  throw std::runtime_error("Failed to close HDF5 dataset.");
280 
281  // Close hyperslab
282  status = H5Sclose(filespace1);
283  if (status == HDF5_FAIL)
284  throw std::runtime_error("Failed to close HDF5 hyperslab.");
285 
286  // Close local dataset
287  status = H5Sclose(memspace);
288  if (status == HDF5_FAIL)
289  throw std::runtime_error("Failed to close local HDF5 dataset.");
290 
291  // Release file-access template
292  status = H5Pclose(plist_id);
293  if (status == HDF5_FAIL)
294  throw std::runtime_error("Failed to release HDF5 file-access template.");
295 }
296 //---------------------------------------------------------------------------
297 template <typename T>
298 inline std::vector<T>
299 HDF5Interface::read_dataset(const hid_t file_handle,
300  const std::string& dataset_path,
301  const std::array<std::int64_t, 2>& range)
302 {
303  auto timer_start = std::chrono::system_clock::now();
304 
305  // Open the dataset
306  const hid_t dset_id
307  = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
308  if (dset_id == HDF5_FAIL)
309  throw std::runtime_error("Failed to open HDF5 global dataset.");
310 
311  // Open dataspace
312  const hid_t dataspace = H5Dget_space(dset_id);
313  if (dataspace == HDF5_FAIL)
314  throw std::runtime_error("Failed to open HDF5 data space.");
315 
316  // Get rank of data set
317  const int rank = H5Sget_simple_extent_ndims(dataspace);
318  assert(rank >= 0);
319  if (rank > 2)
320  LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
321 
322  // Allocate data for shape
323  std::vector<hsize_t> shape(rank);
324 
325  // Get size in each dimension
326  const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
327  if (ndims != rank)
328  throw std::runtime_error("Failed to get dimensionality of dataspace");
329 
330  // Hyperslab selection
331  std::vector<hsize_t> offset(rank, 0);
332  std::vector<hsize_t> count = shape;
333  if (range[0] != -1 and range[1] != -1)
334  {
335  offset[0] = range[0];
336  count[0] = range[1] - range[0];
337  }
338  else
339  offset[0] = 0;
340 
341  // Select a block in the dataset beginning at offset[], with
342  // size=count[]
343  [[maybe_unused]] herr_t status = H5Sselect_hyperslab(
344  dataspace, H5S_SELECT_SET, offset.data(), nullptr, count.data(), nullptr);
345  if (status == HDF5_FAIL)
346  throw std::runtime_error("Failed to select HDF5 hyperslab.");
347 
348  // Create a memory dataspace
349  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
350  if (memspace == HDF5_FAIL)
351  throw std::runtime_error("Failed to create HDF5 dataspace.");
352 
353  // Create local data to read into
354  std::vector<T> data(
355  std::reduce(count.begin(), count.end(), 1, std::multiplies{}));
356 
357  // Read data on each process
358  const hid_t h5type = hdf5_type<T>();
359  status
360  = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
361  if (status == HDF5_FAIL)
362  throw std::runtime_error("Failed to read HDF5 data.");
363 
364  // Close dataspace
365  status = H5Sclose(dataspace);
366  if (status == HDF5_FAIL)
367  throw std::runtime_error("Failed to close HDF5 dataspace.");
368 
369  // Close memspace
370  status = H5Sclose(memspace);
371  if (status == HDF5_FAIL)
372  throw std::runtime_error("Failed to close HDF5 memory space.");
373 
374  // Close dataset
375  status = H5Dclose(dset_id);
376  if (status == HDF5_FAIL)
377  throw std::runtime_error("Failed to close HDF5 dataset.");
378 
379  auto timer_end = std::chrono::system_clock::now();
380  std::chrono::duration<double> dt = (timer_end - timer_start);
381  double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
382 
383  LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
384 
385  return data;
386 }
387 //---------------------------------------------------------------------------
389 } // namespace dolfinx::io
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:27
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:124
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:153
static std::filesystem::path get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:136
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:203
static hid_t open_file(MPI_Comm comm, const std::filesystem::path &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:130
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:244
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:236
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:76
Support for file IO.
Definition: ADIOS2Writers.h:39