-
Notifications
You must be signed in to change notification settings - Fork 2
/
fortio.py
373 lines (323 loc) · 11.9 KB
/
fortio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
"""
# Fortio
A Python IO for Fortran Unformatted Binary Files with Variable-Length Records.
## Features
- read and write Fortran unformatted file
- auto-detect endianness(byteorder)
- allow reading data into pre-allocated buffers
- allow skipping over records or jumping to wanted record directly without reading data
- support subrecords (which is necessary for long record whose size larger than
4GB with signed 4 bytes integer header)
- support numpy.memmap array for fast loading
## Usage
```
from fortio import FortranFile
with FortranFile(filename) as f:
a = f.read_record('i4')
f.skip_record()
b = f.read_record('f8')
```
"""
__author__ = 'Syrtis Major <[email protected]>'
__version__ = '0.4'
import warnings
import numpy as np
import os
__all__ = ['FortranFile']
def _assert_header_equal(head, tail):
if (head != tail):
raise ValueError("inconsistent record headers: %d != %d." % (head, tail))
def _assert_header_abs_equal(head, tail):
if (head != tail) and (head != -tail):
raise ValueError("inconsistent record headers: |%d| != |%d|." % (head, tail))
class FortranFile(object):
"""Fortran Unformatted Binary file with Variable-Length Records.
"""
def __init__(self, filename, mode='r', header_dtype='uint32',
auto_endian=True, check_file=True):
"""
Parameters
----------
filename : str
File to open.
mode : str
The mode can be 'r' or 'w' for reading (default) or writing.
header_dtype : data-type
Data type of the record header, default is 'uint32'.
If signed integer type is used, the file will be assumed to contain
subrecords (ie. long records).
auto_endian: bool
If True, file byteorder will be auto detected, otherwise
byteorder of given header_dtype will be used.
check_file: bool
If True, header consistency of every record in the file will be checked.
This may take a while when the file contains a large number of records.
Once checking is finished, record jumping will be faster.
"""
filename = os.path.abspath(filename)
if mode not in ['r', 'w']:
raise ValueError("mode must be 'r' or 'w'")
header_dtype = np.dtype(header_dtype)
if header_dtype.kind == 'u':
long_records = False
elif header_dtype.kind == 'i':
long_records = True
else:
raise TypeError('header_dtype should be integer type.')
self.file = filename
self.filesize = os.path.getsize(filename)
self.mode = mode
self.header_dtype = header_dtype
self.long_records = long_records
self._fp = open(filename, '%sb' % mode)
if self.mode != 'w':
if auto_endian:
self._check_byteorder()
if check_file:
self._check_file()
self._fp.seek(0) # restore the pos indicator after checking
@property
def closed(self):
return self._fp.closed
@property
def byteorder(self):
return self.header_dtype.byteorder
def _read_header(self):
'''Read the number of bytes of record data.
'''
head, = np.fromfile(self._fp, dtype=self.header_dtype, count=1)
return head
def _check_byteorder(self):
'''Determinate the byteorder of header_dtype by checking the
header consistency of the first record.
'''
try:
# use goto_record in case the current pos indicator is not 0
self.goto_record(1)
except ValueError:
self.header_dtype = self.header_dtype.newbyteorder()
try:
self.goto_record(1)
msg = ("byteorder of the file is set to '%s' by auto-detection."
% self.header_dtype.byteorder)
warnings.warn(msg)
except ValueError:
self.close()
raise ValueError("Invalid fortran file '%s'." % self.file)
def _check_file(self):
try:
self._fp.seek(0)
offsets, lengths = [], []
while True:
offset = self._fp.tell()
if offset == self.filesize:
break
length = self.skip_record()
offsets.append(offset)
lengths.append(length)
self.nrec = len(offsets)
self._offsets = offsets
self._lengths = lengths
except ValueError:
self.close()
raise ValueError("Invalid fortran file '%s'." % self.file)
def write_record(self, data):
'''Write a data record to file.
'''
if self.mode != 'w':
raise IOError('File not open for writing.')
if self.long_records:
raise NotImplementedError('not support writing with signed header yet.')
data = np.asarray(data)
head = np.array(data.nbytes).astype(self.header_dtype)
if data.nbytes > np.iinfo(self.header_dtype).max:
raise ValueError('input data is too long for header_dtype: %s.'
% self.header_dtype.name)
head.tofile(self._fp)
data.tofile(self._fp)
head.tofile(self._fp)
return data.nbytes
def skip_record(self, nrec=1):
'''Skip over the next `nrec` records.
Parameters
----------
nrec : int
Returns
-------
total : int
nbytes of skipped data.
Note the size of headers is not included.
'''
total = 0
if self.long_records:
for i in range(nrec):
while True:
head = self._read_header()
self._fp.seek(abs(head), 1)
tail = self._read_header()
_assert_header_abs_equal(head, tail)
total += abs(int(head))
if head >= 0:
break
else:
for i in range(nrec):
head = self._read_header()
self._fp.seek(head, 1)
tail = self._read_header()
_assert_header_equal(head, tail)
total += int(head)
return total
def _read_record_data(self, data):
'''data should be array with type `byte`'''
total = 0
if self.long_records:
while True:
head = self._read_header()
nread = self._fp.readinto(data[total:total + abs(head)])
tail = self._read_header()
_assert_header_abs_equal(head, tail)
total += nread
if head >= 0:
break
else:
head = self._read_header()
nread = self._fp.readinto(data[:head])
tail = self._read_header()
_assert_header_equal(head, tail)
total += nread
return total
def goto_record(self, rec=None):
'''Skip the first `rec` records from the beginning of the file.
Parameters
----------
rec : int or None
The wanted record. 0 is the first record,
Do nothing if `rec` is None.
'''
if rec is not None:
if hasattr(self, '_offsets'):
self._fp.seek(self._offsets[rec])
else:
self._fp.seek(0)
self.skip_record(rec)
return
def get_record_size(self, rec=None):
'''Get the data size of the record.
Parameters
----------
rec : int or None
The wanted record. 0 is the first record,
`None` means the current record.
Returns
-------
size : int
nbytes of the record data.
Note the size of headers is not included.
'''
if (rec is not None) and hasattr(self, '_lengths'):
size = self._lengths[rec]
else:
pos = self._fp.tell()
self.goto_record(rec)
if self.long_records:
size = self.skip_record()
else:
size = self._read_header()
self._fp.seek(pos)
return size
def mmap_record(self, dtype='byte', shape=None, rec=None):
'''Read a record with given dtype from the file, using memmap
when possible.
Parameters
----------
dtype : data type
Data type. The endianess of record header will be used.
rec : int or None
The record to read. 0 is the first record,
`None` means the current record.
Returns
-------
result : memmap array
Data stored in the record.
'''
return self.read_record(dtype=dtype, shape=shape, rec=rec, memmap=True)
def read_record(self, dtype='byte', shape=None, rec=None, memmap=False):
'''Read a record with given dtype from the file.
Parameters
----------
dtype : data type
Data type. The endianess of record header will be used.
shape : int or tuple
The wanted shape of the record array.
rec : int or None
The record to read. 0 is the first record,
`None` means the current record.
memmap : bool
If true, a memmap of the record will be created and
returned when possible.
Returns
-------
result : ndarr
Data stored in the record.
'''
dtype = np.dtype(dtype).newbyteorder(self.byteorder)
if self.long_records and memmap:
raise ValueError('memmap does not support subrecords.')
self.goto_record(rec)
size = self.get_record_size()
if size % dtype.itemsize:
raise ValueError("record size is not multiple of itemsize.")
if (shape is not None) and (size != dtype.itemsize * np.prod(shape)):
raise ValueError("record size does not match the wanted shape.")
if memmap:
data = np.memmap(self.file, dtype='byte', shape=size, mode='r',
offset=self._fp.tell() + self.header_dtype.itemsize)
self.skip_record()
else:
data = np.empty(size, dtype='byte')
self._read_record_data(data)
return data.view(dtype).reshape(shape)
def read_record_into(self, into, offset=None, rec=None):
'''Read a record from the file into given array.
Parameters
----------
into : ndarray
The array to store the record data.
offset : int
The offset *bytes*, ie. data is read into `into.view('byte')[offset:]`.
rec : int or None
The record to read. 0 is the first record,
`None` means the current record.
Returns
-------
nread : int
nbytes of read data.
Notes
-----
This function does nothing with endianess, you may want
to check endianess of data read in by yourself if necessary.
'''
if into.dtype.byteorder != self.byteorder:
raise TypeError('endianess of the input array does not match the file.')
data = into.reshape(-1).view('byte')
if offset is not None:
data = data[offset:]
self.goto_record(rec)
size = self.get_record_size()
if size > data.nbytes:
raise ValueError("record size is larger than given array.")
nread = self._read_record_data(data)
return nread
def close(self):
'''Close file.'''
self._fp.close()
def flush(self):
'''Flush the buffer.'''
self._fp.flush()
def __enter__(self):
return self
def __exit__(self, type, value, trace):
self.close()
def __repr__(self):
return "<FortranFile '{}', mode '{}', header_dtype '{}' at {}>".format(
self.file, self.mode, self.header_dtype.str, hex(id(self)))