/usr/share/pyshared/neo/io/klustakwikio.py is in python-neo 0.2.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 | """Reading and writing from KlustaKwik-format files.
Ref: http://klusters.sourceforge.net/UserManual/data-files.html
Supported : Read, Write
Author : Chris Rodgers
TODO:
* When reading, put the Unit into the RCG, RC hierarchy
* When writing, figure out how to get group and cluster if those annotations
weren't set. Consider removing those annotations if they are redundant.
* Load features in addition to spiketimes.
"""
# I need to subclass BaseIO
from .baseio import BaseIO
# to import : Block, Segment, AnalogSignal, SpikeTrain, SpikeTrainList
from ..core import *
from .tools import create_many_to_one_relationship
# note neo.core need only numpy and quantitie
import numpy as np
import quantities as pq
# Pasted version of feature file format spec
"""
The Feature File
Generic file name: base.fet.n
Format: ASCII, integer values
The feature file lists for each spike the PCA coefficients for each
electrode, followed by the timestamp of the spike (more features can
be inserted between the PCA coefficients and the timestamp).
The first line contains the number of dimensions.
Assuming N1 spikes (spike1...spikeN1), N2 electrodes (e1...eN2) and
N3 coefficients (c1...cN3), this file looks like:
nbDimensions
c1_e1_spike1 c2_e1_spike1 ... cN3_e1_spike1 c1_e2_spike1 ... cN3_eN2_spike1 timestamp_spike1
c1_e1_spike2 c2_e1_spike2 ... cN3_e1_spike2 c1_e2_spike2 ... cN3_eN2_spike2 timestamp_spike2
...
c1_e1_spikeN1 c2_e1_spikeN1 ... cN3_e1_spikeN1 c1_e2_spikeN1 ... cN3_eN2_spikeN1 timestamp_spikeN1
The timestamp is expressed in multiples of the sampling interval. For
instance, for a 20kHz recording (50 microsecond sampling interval), a
timestamp of 200 corresponds to 200x0.000050s=0.01s from the beginning
of the recording session.
Notice that the last line must end with a newline or carriage return.
"""
import numpy as np
import glob
import matplotlib.mlab as mlab
import os.path
import shutil
import logging
class KlustaKwikIO(BaseIO):
"""Reading and writing from KlustaKwik-format files."""
# Class variables demonstrating capabilities of this IO
is_readable = True
is_writable = True
# This IO can only manipulate objects relating to spike times
supported_objects = [Block, SpikeTrain, Unit]
# Keep things simple by always returning a block
readable_objects = [Block]
# And write a block
writeable_objects = [Block]
# Not sure what these do, if anything
has_header = False
is_streameable = False
# GUI params
read_params = {}
# GUI params
write_params = {}
# The IO name and the file extensions it uses
name = 'KlustaKwik'
extensions = ['fet', 'clu', 'res', 'spk']
# Operates on directories
mode = 'file'
def __init__(self, filename, sampling_rate=30000.):
"""Create a new IO to operate on a directory
filename : the directory to contain the files
basename : string, basename of KlustaKwik format, or None
sampling_rate : in Hz, necessary because the KlustaKwik files
stores data in samples.
"""
BaseIO.__init__(self)
#self.filename = os.path.normpath(filename)
self.filename, self.basename = os.path.split(os.path.abspath(filename))
self.sampling_rate = float(sampling_rate)
# error check
if not os.path.isdir(self.filename):
raise ValueError("filename must be a directory")
# initialize a helper object to parse filenames
self._fp = FilenameParser(dirname=self.filename, basename=self.basename)
# The reading methods. The `lazy` and `cascade` parameters are imposed
# by neo.io API
def read_block(self, lazy=False, cascade=True):
"""Returns a Block containing spike information.
There is no obvious way to infer the segment boundaries from
raw spike times, so for now all spike times are returned in one
big segment. The way around this would be to specify the segment
boundaries, and then change this code to put the spikes in the right
segments.
"""
# Create block and segment to hold all the data
block = Block()
# Search data directory for KlustaKwik files.
# If nothing found, return empty block
self._fetfiles = self._fp.read_filenames('fet')
self._clufiles = self._fp.read_filenames('clu')
if len(self._fetfiles) == 0 or not cascade:
return block
# Create a single segment to hold all of the data
seg = Segment(name='seg0', index=0, file_origin=self.filename)
block.segments.append(seg)
# Load spike times from each group and store in a dict, keyed
# by group number
self.spiketrains = dict()
for group in sorted(self._fetfiles.keys()):
# Load spike times
fetfile = self._fetfiles[group]
spks, features = self._load_spike_times(fetfile)
# Load cluster ids or generate
if group in self._clufiles:
clufile = self._clufiles[group]
uids = self._load_unit_id(clufile)
else:
# unclustered data, assume all zeros
uids = np.zeros(spks.shape, dtype=np.int32)
# error check
if len(spks) != len(uids):
raise ValueError("lengths of fet and clu files are different")
# Create Unit for each cluster
unique_unit_ids = np.unique(uids)
for unit_id in sorted(unique_unit_ids):
# Initialize the unit
u = Unit(name=('unit %d from group %d' % (unit_id, group)),
index=unit_id, group=group)
# Initialize a new SpikeTrain for the spikes from this unit
if lazy:
st = SpikeTrain(
times=[],
units='sec', t_start=0.0,
t_stop=spks.max() / self.sampling_rate,
name=('unit %d from group %d' % (unit_id, group)))
st.lazy_shape = len(spks[uids==unit_id])
else:
st = SpikeTrain(
times=spks[uids==unit_id] / self.sampling_rate,
units='sec', t_start=0.0,
t_stop=spks.max() / self.sampling_rate,
name=('unit %d from group %d' % (unit_id, group)))
st.annotations['cluster'] = unit_id
st.annotations['group'] = group
# put features in
if not lazy and len(features) != 0:
st.annotations['waveform_features'] = features
# Link
u.spiketrains.append(st)
seg.spiketrains.append(st)
create_many_to_one_relationship(block)
return block
# Helper hidden functions for reading
def _load_spike_times(self, fetfilename):
"""Reads and returns the spike times and features"""
f = file(fetfilename, 'r')
# Number of clustering features is integer on first line
nbFeatures = int(f.readline().strip())
# Each subsequent line consists of nbFeatures values, followed by
# the spike time in samples.
names = ['fet%d' % n for n in xrange(nbFeatures)]
names.append('spike_time')
# Load into recarray
data = mlab.csv2rec(f, names=names, skiprows=1, delimiter=' ')
f.close()
# get features
features = np.array([data['fet%d' % n] for n in xrange(nbFeatures)])
# Return the spike_time column
return data['spike_time'], features.transpose()
def _load_unit_id(self, clufilename):
"""Reads and return the cluster ids as int32"""
f = file(clufilename, 'r')
# Number of clusters on this tetrode is integer on first line
nbClusters = int(f.readline().strip())
# Read each cluster name as a string
cluster_names = f.readlines()
f.close()
# Convert names to integers
# I think the spec requires cluster names to be integers, but
# this code could be modified to support string names which are
# auto-numbered.
try:
cluster_ids = [int(name) for name in cluster_names]
except ValueError:
raise ValueError(
"Could not convert cluster name to integer in %s" % clufilename)
# convert to numpy array and error check
cluster_ids = np.array(cluster_ids, dtype=np.int32)
if len(np.unique(cluster_ids)) != nbClusters:
logging.warning("warning: I got %d clusters instead of %d in %s" % (
len(np.unique(cluster_ids)), nbClusters, clufilename))
return cluster_ids
# writing functions
def write_block(self, block):
"""Write spike times and unit ids to disk.
Currently descends hierarchy from block to segment to spiketrain.
Then gets group and cluster information from spiketrain.
Then writes the time and cluster info to the file associated with
that group.
The group and cluster information are extracted from annotations,
eg `sptr.annotations['group']`. If no cluster information exists,
it is assigned to cluster 0.
Note that all segments are essentially combined in
this process, since the KlustaKwik format does not allow for
segment boundaries.
As implemented currently, does not use the `Unit` object at all.
We first try to use the sampling rate of each SpikeTrain, or if this
is not set, we use `self.sampling_rate`.
If the files already exist, backup copies are created by appending
the filenames with a "~".
"""
# set basename
if self.basename is None:
logging.warning("warning: no basename provided, using `basename`")
self.basename = 'basename'
# First create file handles for each group which will be stored
self._make_all_file_handles(block)
# We'll detect how many features belong in each group
self._group2features = {}
# Iterate through segments in this block
for seg in block.segments:
# Write each spiketrain of the segment
for st in seg.spiketrains:
# Get file handles for this spiketrain using its group
group = self.st2group(st)
fetfilehandle = self._fetfilehandles[group]
clufilehandle = self._clufilehandles[group]
# Get the id to write to clu file for this spike train
cluster = self.st2cluster(st)
# Choose sampling rate to convert to samples
try:
sr = st.annotations['sampling_rate']
except KeyError:
sr = self.sampling_rate
# Convert to samples
spike_times_in_samples = np.rint(
np.array(st) * sr).astype(np.int)
# Try to get features from spiketrain
try:
all_features = st.annotations['waveform_features']
except KeyError:
# Use empty
all_features = [
[] for n in range(len(spike_times_in_samples))]
all_features = np.asarray(all_features)
if all_features.ndim != 2:
raise ValueError("waveform features should be 2d array")
# Check number of features we're supposed to have
try:
n_features = self._group2features[group]
except KeyError:
# First time through .. set number of features
n_features = all_features.shape[1]
self._group2features[group] = n_features
# and write to first line of file
fetfilehandle.write("%d\n" % n_features)
if n_features != all_features.shape[1]:
raise ValueError("inconsistent number of features: " +
"supposed to be %d but I got %d" %\
(n_features, all_features.shape[1]))
# Write features and time for each spike
for stt, features in zip(spike_times_in_samples, all_features):
# first features
for val in features:
fetfilehandle.write(str(val))
fetfilehandle.write(" ")
# now time
fetfilehandle.write("%d\n" % stt)
# and cluster id
clufilehandle.write("%d\n" % cluster)
# We're done, so close the files
self._close_all_files()
# Helper functions for writing
def st2group(self, st):
# Not sure this is right so make it a method in case we change it
try:
return st.annotations['group']
except KeyError:
return 0
def st2cluster(self, st):
# Not sure this is right so make it a method in case we change it
try:
return st.annotations['cluster']
except KeyError:
return 0
def _make_all_file_handles(self, block):
"""Get the tetrode (group) of each neuron (cluster) by descending
the hierarchy through segment and block.
Store in a dict {group_id: list_of_clusters_in_that_group}
"""
group2clusters = {}
for seg in block.segments:
for st in seg.spiketrains:
group = self.st2group(st)
cluster = self.st2cluster(st)
if group in group2clusters:
if cluster not in group2clusters[group]:
group2clusters[group].append(cluster)
else:
group2clusters[group] = [cluster]
# Make new file handles for each group
self._fetfilehandles, self._clufilehandles = {}, {}
for group, clusters in group2clusters.items():
self._new_group(group, nbClusters=len(clusters))
def _new_group(self, id_group, nbClusters):
# generate filenames
fetfilename = os.path.join(self.filename,
self.basename + ('.fet.%d' % id_group))
clufilename = os.path.join(self.filename,
self.basename + ('.clu.%d' % id_group))
# back up before overwriting
if os.path.exists(fetfilename):
shutil.copyfile(fetfilename, fetfilename + '~')
if os.path.exists(clufilename):
shutil.copyfile(clufilename, clufilename + '~')
# create file handles
self._fetfilehandles[id_group] = file(fetfilename, 'w')
self._clufilehandles[id_group] = file(clufilename, 'w')
# write out first line
#self._fetfilehandles[id_group].write("0\n") # Number of features
self._clufilehandles[id_group].write("%d\n" % nbClusters)
def _close_all_files(self):
for val in self._fetfilehandles.values(): val.close()
for val in self._clufilehandles.values(): val.close()
class FilenameParser:
"""Simple class to interpret user's requests into KlustaKwik filenames"""
def __init__(self, dirname, basename=None):
"""Initialize a new parser for a directory containing files
dirname: directory containing files
basename: basename in KlustaKwik format spec
If basename is left None, then files with any basename in the directory
will be used. An error is raised if files with multiple basenames
exist in the directory.
"""
self.dirname = os.path.normpath(dirname)
self.basename = basename
# error check
if not os.path.isdir(self.dirname):
raise ValueError("filename must be a directory")
def read_filenames(self, typestring='fet'):
"""Returns filenames in the data directory matching the type.
Generally, `typestring` is one of the following:
'fet', 'clu', 'spk', 'res'
Returns a dict {group_number: filename}, e.g.:
{ 0: 'basename.fet.0',
1: 'basename.fet.1',
2: 'basename.fet.2'}
'basename' can be any string not containing whitespace.
Only filenames that begin with "basename.typestring." and end with
a sequence of digits are valid. The digits are converted to an integer
and used as the group number.
"""
all_filenames = glob.glob(os.path.join(self.dirname, '*'))
# Fill the dict with valid filenames
d = {}
for v in all_filenames:
# Test whether matches format, ie ends with digits
split_fn = os.path.split(v)[1]
m = glob.re.search(('^(\w+)\.%s\.(\d+)$' % typestring), split_fn)
if m is not None:
# get basename from first hit if not specified
if self.basename is None:
self.basename = m.group(1)
# return files with correct basename
if self.basename == m.group(1):
# Key the group number to the filename
# This conversion to int should always work since only
# strings of digits will match the regex
tetn = int(m.group(2))
d[tetn] = v
return d
|