Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/v0.3.0/v0.9.0/cpp
DOLFINx  0.3.0
DOLFINx C++ interface
HDF5Interface.h
1 // Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <array>
10 #include <cassert>
11 #include <chrono>
12 #include <cstdint>
13 #include <dolfinx/common/log.h>
14 #include <hdf5.h>
15 #include <mpi.h>
16 #include <string>
17 #include <vector>
18 
19 namespace dolfinx::io
20 {
21 
23 
25 {
26 #define HDF5_FAIL -1
27 public:
33  static hid_t open_file(MPI_Comm comm, const std::string& filename,
34  const std::string& mode, const bool use_mpi_io);
35 
38  static void close_file(const hid_t handle);
39 
42  static void flush_file(const hid_t handle);
43 
47  static std::string get_filename(hid_t handle);
48 
59  template <typename T>
60  static void write_dataset(const hid_t handle, const std::string& dataset_path,
61  const T* data,
62  const std::array<std::int64_t, 2>& range,
63  const std::vector<std::int64_t>& global_size,
64  bool use_mpi_io, bool use_chunking);
65 
74  template <typename T>
75  static std::vector<T> read_dataset(const hid_t handle,
76  const std::string& dataset_path,
77  const std::array<std::int64_t, 2>& range);
78 
83  static bool has_dataset(const hid_t handle, const std::string& dataset_path);
84 
89  static std::vector<std::int64_t>
90  get_dataset_shape(const hid_t handle, const std::string& dataset_path);
91 
98  static void set_mpi_atomicity(const hid_t handle, const bool atomic);
99 
104  static bool get_mpi_atomicity(const hid_t handle);
105 
106 private:
110  static void add_group(const hid_t handle, const std::string& dataset_path);
111 
112  // Return HDF5 data type
113  template <typename T>
114  static hid_t hdf5_type()
115  {
116  throw std::runtime_error("Cannot get HDF5 primitive data type. "
117  "No specialised function for this data type");
118  }
119 };
121 //---------------------------------------------------------------------------
122 template <>
123 inline hid_t HDF5Interface::hdf5_type<float>()
124 {
125  return H5T_NATIVE_FLOAT;
126 }
127 //---------------------------------------------------------------------------
128 template <>
129 inline hid_t HDF5Interface::hdf5_type<double>()
130 {
131  return H5T_NATIVE_DOUBLE;
132 }
133 //---------------------------------------------------------------------------
134 template <>
135 inline hid_t HDF5Interface::hdf5_type<int>()
136 {
137  return H5T_NATIVE_INT;
138 }
139 //---------------------------------------------------------------------------
140 template <>
141 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
142 {
143  return H5T_NATIVE_INT64;
144 }
145 //---------------------------------------------------------------------------
146 template <>
147 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
148 {
149  if (sizeof(std::size_t) == sizeof(unsigned long))
150  return H5T_NATIVE_ULONG;
151  else if (sizeof(std::size_t) == sizeof(unsigned int))
152  return H5T_NATIVE_UINT;
153 
154  throw std::runtime_error("Cannot determine size of std::size_t. "
155  "std::size_t is not the same size as long or int");
156 }
157 //---------------------------------------------------------------------------
158 template <typename T>
159 inline void HDF5Interface::write_dataset(
160  const hid_t file_handle, const std::string& dataset_path, const T* data,
161  const std::array<std::int64_t, 2>& range,
162  const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
163 {
164  // Data rank
165  const std::size_t rank = global_size.size();
166  assert(rank != 0);
167 
168  if (rank > 2)
169  {
170  throw std::runtime_error("Cannot write dataset to HDF5 file"
171  "Only rank 1 and rank 2 dataset are supported");
172  }
173 
174  // Get HDF5 data type
175  const hid_t h5type = hdf5_type<T>();
176 
177  // Hyperslab selection parameters
178  std::vector<hsize_t> count(global_size.begin(), global_size.end());
179  count[0] = range[1] - range[0];
180 
181  // Data offsets
182  std::vector<hsize_t> offset(rank, 0);
183  offset[0] = range[0];
184 
185  // Dataset dimensions
186  const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
187 
188  // Generic status report
189  herr_t status;
190 
191  // Create a global data space
192  const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
193  assert(filespace0 != HDF5_FAIL);
194 
195  // Set chunking parameters
196  hid_t chunking_properties;
197  if (use_chunking)
198  {
199  // Set chunk size and limit to 1kB min/1MB max
200  hsize_t chunk_size = dimsf[0] / 2;
201  if (chunk_size > 1048576)
202  chunk_size = 1048576;
203  if (chunk_size < 1024)
204  chunk_size = 1024;
205 
206  hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
207  chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
208  H5Pset_chunk(chunking_properties, rank, chunk_dims);
209  }
210  else
211  chunking_properties = H5P_DEFAULT;
212 
213  // Check that group exists and recursively create if required
214  const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
215  add_group(file_handle, group_name);
216 
217  // Create global dataset (using dataset_path)
218  const hid_t dset_id
219  = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
220  H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
221  assert(dset_id != HDF5_FAIL);
222 
223  // Close global data space
224  status = H5Sclose(filespace0);
225  assert(status != HDF5_FAIL);
226 
227  // Create a local data space
228  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
229  assert(memspace != HDF5_FAIL);
230 
231  // Create a file dataspace within the global space - a hyperslab
232  const hid_t filespace1 = H5Dget_space(dset_id);
233  status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
234  nullptr, count.data(), nullptr);
235  assert(status != HDF5_FAIL);
236 
237  // Set parallel access
238  const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
239  if (use_mpi_io)
240  {
241 #ifdef H5_HAVE_PARALLEL
242  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
243  assert(status != HDF5_FAIL);
244 #else
245  throw std::runtime_error("HDF5 library has not been configured with MPI");
246 #endif
247  }
248 
249  // Write local dataset into selected hyperslab
250  status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
251  assert(status != HDF5_FAIL);
252 
253  if (use_chunking)
254  {
255  // Close chunking properties
256  status = H5Pclose(chunking_properties);
257  assert(status != HDF5_FAIL);
258  }
259 
260  // Close dataset collectively
261  status = H5Dclose(dset_id);
262  assert(status != HDF5_FAIL);
263 
264  // Close hyperslab
265  status = H5Sclose(filespace1);
266  assert(status != HDF5_FAIL);
267 
268  // Close local dataset
269  status = H5Sclose(memspace);
270  assert(status != HDF5_FAIL);
271 
272  // Release file-access template
273  status = H5Pclose(plist_id);
274  assert(status != HDF5_FAIL);
275 }
276 //---------------------------------------------------------------------------
277 template <typename T>
278 inline std::vector<T>
279 HDF5Interface::read_dataset(const hid_t file_handle,
280  const std::string& dataset_path,
281  const std::array<std::int64_t, 2>& range)
282 {
283  auto timer_start = std::chrono::system_clock::now();
284 
285  // Open the dataset
286  const hid_t dset_id
287  = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
288  assert(dset_id != HDF5_FAIL);
289 
290  // Open dataspace
291  const hid_t dataspace = H5Dget_space(dset_id);
292  assert(dataspace != HDF5_FAIL);
293 
294  // Get rank of data set
295  const int rank = H5Sget_simple_extent_ndims(dataspace);
296  assert(rank >= 0);
297 
298  if (rank > 2)
299  LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
300 
301  // Allocate data for shape
302  std::vector<hsize_t> shape(rank);
303 
304  // Get size in each dimension
305  const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
306  assert(ndims == rank);
307 
308  // Hyperslab selection
309  std::vector<hsize_t> offset(rank, 0);
310  std::vector<hsize_t> count = shape;
311  if (range[0] != -1 and range[1] != -1)
312  {
313  offset[0] = range[0];
314  count[0] = range[1] - range[0];
315  }
316  else
317  offset[0] = 0;
318 
319  // Select a block in the dataset beginning at offset[], with
320  // size=count[]
321  herr_t status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset.data(),
322  nullptr, count.data(), nullptr);
323  assert(status != HDF5_FAIL);
324 
325  // Create a memory dataspace
326  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
327  assert(memspace != HDF5_FAIL);
328 
329  // Create local data to read into
330  std::size_t data_size = 1;
331  for (std::size_t i = 0; i < count.size(); ++i)
332  data_size *= count[i];
333  std::vector<T> data(data_size);
334 
335  // Read data on each process
336  const hid_t h5type = hdf5_type<T>();
337  status
338  = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
339  assert(status != HDF5_FAIL);
340 
341  // Close dataspace
342  status = H5Sclose(dataspace);
343  assert(status != HDF5_FAIL);
344 
345  // Close memspace
346  status = H5Sclose(memspace);
347  assert(status != HDF5_FAIL);
348 
349  // Close dataset
350  status = H5Dclose(dset_id);
351  assert(status != HDF5_FAIL);
352 
353  auto timer_end = std::chrono::system_clock::now();
354  std::chrono::duration<double> dt = (timer_end - timer_start);
355  double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
356 
357  LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
358 
359  return data;
360 }
361 //---------------------------------------------------------------------------
363 } // namespace dolfinx::io
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:25
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:145
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:228
static hid_t open_file(MPI_Comm comm, const std::string &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static std::string get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:128
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:116
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:195
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:122
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:236
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:74
Support for file IO.
Definition: cells.h:22