patternpythonMinor
Reading a binary file containing periodic samples
Viewed 0 times
containingreadingfileperiodicbinarysamples
Problem
I have the following code for reading HTK feature files. The code below is working completely correct (verified it with unit tests and the output of the original HTK toolkit).
How can I speed up this code, are there things I should improve in the code?
from HTK_model import FLOAT_TYPE
from numpy import array
from struct import unpack
def feature_reader(file_name):
with open(file_name, 'rb') as in_f:
#There are four standard headers. Sample period is not used
num_samples = unpack('>i', in_f.read(4))[0]
sample_period = unpack('>i', in_f.read(4))[0]
sample_size = unpack('>h', in_f.read(2))[0]
param_kind = unpack('>h', in_f.read(2))[0]
compressed = bool(param_kind & 02000)
#If compression is used, two matrices are defined. In that case the values are shorts, and the real values are:
# (x+B)/A
A = B = 0
if compressed:
A = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
B = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
#The first 4 samples were the matrices
num_samples -= 4
for _ in xrange(0,num_samples):
if compressed:
yield ((array( unpack('>' + ('h' * (sample_size//2)),in_f.read(sample_size)) ,dtype=FLOAT_TYPE) + B) / A)
else:
yield (array( unpack('>' + ('f' * (sample_size//4)),in_f.read(sample_size)), dtype=FLOAT_TYPE))How can I speed up this code, are there things I should improve in the code?
Solution
data = in_f.read(12)
num_samples, sample_period, sample_size, param_kind = unpack('>iihh', data)
A = B = 0
if compressed:
A = array('f')
A.fromfile(in_f, sample_size/2)
B = array('f')
B.fromfile(in_f, sample_size/2)
#The first 4 samples were the matrices
num_samples -= 4And so on
Code Snippets
data = in_f.read(12)
num_samples, sample_period, sample_size, param_kind = unpack('>iihh', data)
A = B = 0
if compressed:
A = array('f')
A.fromfile(in_f, sample_size/2)
B = array('f')
B.fromfile(in_f, sample_size/2)
#The first 4 samples were the matrices
num_samples -= 4Context
StackExchange Code Review Q#1496, answer score: 3
Revisions (0)
No revisions yet.