This file is indexed.

/usr/share/pyshared/biom/parse.py is in python-biom-format 1.1.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
#!/usr/bin/env python

from __future__ import division
from biom.exception import BiomParseException
from biom.table import SparseOTUTable, DenseOTUTable, SparsePathwayTable, \
        DensePathwayTable, SparseFunctionTable, DenseFunctionTable, \
        SparseOrthologTable, DenseOrthologTable, SparseGeneTable, \
        DenseGeneTable, SparseMetaboliteTable, DenseMetaboliteTable,\
        SparseTaxonTable, DenseTaxonTable, table_factory, to_sparse,\
        nparray_to_sparseobj, SparseObj
from biom.csmat import CSMat
import json
from numpy import zeros, asarray, uint32, float64
from string import strip

__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2012, BIOM-Format Project"
__credits__ = ["Justin Kuczynski", "Daniel McDonald", "Greg Caporaso", "Jose Carlos Clemente Litran"]
__license__ = "GPL"
__url__ = "http://biom-format.org"
__version__ = "1.1.2"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald@colorado.edu"

MATRIX_ELEMENT_TYPE = {'int':int,'float':float,'unicode':unicode,
                      u'int':int,u'float':float,u'unicode':unicode}

QUOTE = '"'
JSON_OPEN = set(["[", "{"])
JSON_CLOSE = set(["]", "}"])
JSON_SKIP = set([" ","\t","\n",","])
JSON_START = set(["0","1","2","3","4","5","6","7","8","9","{","[",'"'])
def direct_parse_key(biom_str, key):
    """Returns key:value from the biom string, or ""
    
    This method pulls an arbitrary key/value pair out from a BIOM string
    """
    base_idx = biom_str.find('"%s":' % key)
    if base_idx == -1:
        return ""
    else:
        start_idx = base_idx + len(key) + 3 # shift over "key":

    # find the start token
    cur_idx = start_idx 
    while biom_str[cur_idx] not in JSON_START:  
        cur_idx += 1
    
    if biom_str[cur_idx] not in JSON_OPEN:
        # do we have a number?
        while biom_str[cur_idx] not in [",","{","}"]:
            cur_idx += 1
            
    else:
        # we have an object
        stack = [biom_str[cur_idx]]
        cur_idx += 1
        while stack:
            cur_char = biom_str[cur_idx]
            
            if cur_char == QUOTE:
                if stack[-1] == QUOTE:
                    stack.pop()
                else:
                    stack.append(cur_char)
            elif cur_char in JSON_CLOSE:
                try:
                    stack.pop()
                except IndexError: # got an int or float?
                    cur_idx -= 1
                    break
            elif cur_char in JSON_OPEN:
                stack.append(cur_char)
            cur_idx += 1

    return biom_str[base_idx:cur_idx]

def direct_slice_data(biom_str, to_keep, axis):
    """Pull out specific slices from a BIOM string

    biom_str : JSON-formatted BIOM string
    to_keep  : indices to keep
    axis     : either 'samples' or 'observations'
    
    Will raise IndexError if the inices are out of bounds. Fully zerod rows
    or columns are possible and this is _not_ checked. 
    """
    if axis not in ['observations','samples']:
        raise IndexError, "Unknown axis type"

    # it would be nice if all of these lookups could be done in a single
    # traversal of biom_str, but it likely is at the cost of code complexity
    shape_kv_pair = direct_parse_key(biom_str, "shape")
    if shape_kv_pair == "":
        raise ValueError, "biom_str does not appear to be in BIOM format!"

    data_fields = direct_parse_key(biom_str, "data")
    if data_fields == "":
        raise ValueError, "biom_str does not appear to be in BIOM format!"

    matrix_type_kv_pair = direct_parse_key(biom_str, "matrix_type")
    if matrix_type_kv_pair == "":
        raise ValueError, "biom_str does not appear to be in BIOM format!"

    # determine shape
    raw_shape = shape_kv_pair.split(':')[-1].replace("[","").replace("]","")
    n_rows, n_cols = map(int, raw_shape.split(","))
   
    # slice to just data
    data_start = data_fields.find('[') +1
    data_fields = data_fields[data_start:len(data_fields)-1] # trim trailing ]
    
    # determine matrix type
    matrix_type = matrix_type_kv_pair.split(':')[-1].strip()

    # bounds check
    if min(to_keep) < 0:
        raise IndexError, "Observations to keep are out of bounds!"
    
    # more bounds check and set new shape
    new_shape = "[%d, %d]"
    if axis == 'observations':
        if max(to_keep) >= n_rows:
            raise IndexError, "Observations to keep are out of bounds!"
        new_shape = new_shape % (len(to_keep), n_cols)
    elif axis == 'samples':
        if max(to_keep) >= n_cols:
            raise IndexError, "Samples to keep are out of bounds!"
        new_shape = new_shape % (n_rows, len(to_keep))

    to_keep = set(to_keep) 
    new_data = []

    if matrix_type == '"dense"':
        if axis == 'observations':
            new_data = _direct_slice_data_dense_obs(data_fields, to_keep)
        elif axis == 'samples':
            new_data = _direct_slice_data_dense_samp(data_fields, to_keep)
    elif matrix_type == '"sparse"':
        if axis == 'observations':
            new_data = _direct_slice_data_sparse_obs(data_fields, to_keep)
        elif axis == 'samples':
            new_data = _direct_slice_data_sparse_samp(data_fields, to_keep)
    else:
        raise ValueError, "biom_str does not appear to be in BIOM format!"
    
    return '"data": %s, "shape": %s' % (new_data, new_shape)

STRIP_F = lambda x: x.strip("[] \n\t")
def _remap_axis_sparse_obs(rcv, lookup):
    """Remap a sparse observation axis"""
    row,col,value = map(STRIP_F, rcv.split(','))
    return "%s,%s,%s" % (lookup[row], col, value)    

def _remap_axis_sparse_samp(rcv, lookup):
    """Remap a sparse sample axis"""
    row,col,value = map(STRIP_F, rcv.split(','))
    return "%s,%s,%s" % (row, lookup[col], value)    

def _direct_slice_data_dense_obs(data, to_keep):
    """slice observations from data
    
    data : raw data string from a biom file
    to_keep : rows to keep
    """
    new_data = []
    for row_count, row in enumerate(data.split('],')):
        if row_count in to_keep:
            new_data.append(STRIP_F(row))
    return '[[%s]]' % '],['.join(new_data)

def _direct_slice_data_dense_samp(data, to_keep):
    """slice samples from data
    
    data : raw data string from a biom file
    to_keep : columns to keep
    """
    new_data = []
    for row in data.split('],'):
        new_row = []
        # dive into the cols and keep those specified
        for col_idx,v in enumerate(row.split(',')):
            if col_idx in to_keep:
                new_row.append(STRIP_F(v))
        new_data.append("%s" % ','.join(new_row))
    return '[[%s]]' % '],['.join(new_data)

def _direct_slice_data_sparse_obs(data, to_keep):
    """slice observations from data
    
    data : raw data string from a biom file
    to_keep : rows to keep
    """
    # interogate all the datas
    new_data = []
    remap_lookup = dict([(str(v),i) for i,v in enumerate(sorted(to_keep))])
    for rcv in data.split('],'):
        r,c,v = STRIP_F(rcv).split(',')
        if r in remap_lookup:
            new_data.append(_remap_axis_sparse_obs(rcv, remap_lookup))
    return '[[%s]]' % '],['.join(new_data)

def _direct_slice_data_sparse_samp(data, to_keep):
    """slice samples from data
    
    data : raw data string from a biom file
    to_keep : columns to keep
    """
    # could do sparse obs/samp in one forloop, but then theres the 
    # expense of the additional if-statement in the loop
    new_data = []
    remap_lookup = dict([(str(v),i) for i,v in enumerate(sorted(to_keep))])
    for rcv in data.split('],'):
        r,c,v = rcv.split(',')
        if c in remap_lookup:
            new_data.append(_remap_axis_sparse_samp(rcv, remap_lookup))
    return '[[%s]]' % '],['.join(new_data)

def get_axis_indices(biom_str, to_keep, axis):
    """Returns the indices for the associated ids to keep

    biom_str : a BIOM formatted JSON string
    to_keep  : a list of IDs to get indices for
    axis     : either 'samples' or 'observations'
    
    Raises KeyError if unknown key is specified
    """
    to_keep = set(to_keep)
    if axis == 'observations':
        axis_key = 'rows'
        axis_data = direct_parse_key(biom_str, axis_key)
    elif axis == "samples":
        axis_key = 'columns'
        axis_data = direct_parse_key(biom_str, axis_key)
    else:
        raise ValueError, "Unknown axis!"

    if axis_data == "":
        raise ValueError, "biom_str does not appear to be in BIOM format!"

    axis_data = json.loads("{%s}" % axis_data)

    all_ids = set([v['id'] for v in axis_data[axis_key]])
    if not to_keep.issubset(all_ids):
        raise KeyError, "Not all of the to_keep ids are in biom_str!"

    idxs = [i for i,v in enumerate(axis_data[axis_key]) if v['id'] in to_keep]
    idxs_lookup = set(idxs)

    subset = {axis_key:[]}
    for i,v in enumerate(axis_data[axis_key]):
        if i in idxs_lookup:
            subset[axis_key].append(v)

    return idxs, json.dumps(subset)[1:-1] # trim off { and }

def light_parse_biom_sparse(biom_str, constructor):
    """Light-weight BIOM parser for sparse objects

    Constructor must match the loaded table type
    """
    if constructor._biom_matrix_type != "sparse":
        raise AttributeError, "Constructor must be sparse!"

    # is data: separated by a space?
    data_start = biom_str.find('"data":')
    if biom_str[data_start + 7] == " ":
        start_idx = data_start + 8
    else:
        start_idx = data_start + 7

    end_idx = biom_str[start_idx:].find(']]') + start_idx
    data = biom_str[start_idx:end_idx]
    new_s = biom_str[:start_idx]
    new_s += '[[0, 0, 1]]'
    new_s += biom_str[(end_idx + 2):]
    
    # get shape
    start_idx = biom_str.find('"shape":') + 10
    end_idx = biom_str[start_idx:start_idx + 30].find('],') + start_idx
    row, col = map(int, biom_str[start_idx:end_idx].replace('[','').split(', '))
    data_mat = SparseObj(row, col)

    for rec in data.replace('[','').split('],'):
        try:
            r,c,v = rec.split(',')
        except:
            raise TypeError, "Data do not appear sparse!"
            
        data_mat[uint32(r),uint32(c)] = float64(v)

    t = parse_biom_table_str(new_s, constructor, data_pump=data_mat)

    return t

def parse_biom_table(json_fh,constructor=None, try_light_parse=True):
    """parses a biom format otu table into a rich otu table object

    input is an open filehandle or compatable object (e.g. list of lines)

    sparse/dense will be determined by "matrix_type" in biom file, and 
    either a SparseOTUTable or DenseOTUTable object will be returned
    note that sparse here refers to the compressed format of [row,col,count]
    dense refers to the full / standard matrix representations

    If try_light_parse is True, the light_parse_biom_sparse call will be 
    attempted. If that parse fails, the code will fall back to the regular
    BIOM parser.
    """
    table_str = ''.join(json_fh)

    if try_light_parse:
        try:
            t = light_parse_biom_sparse(table_str, constructor)
        except:
            t = parse_biom_table_str(table_str, constructor=constructor)
    else: 
        t = parse_biom_table_str(table_str, constructor=constructor)
    return t

def pick_constructor(mat_type, table_type, constructor, valid_constructors):
    """Make sure constructor is sane, attempt to pick one if not specified

    Excepts valid_constructors to be a list in the order of 
    [SparseTable, DenseTable] in which the objects present must subclass the
    objects respectively (eg [SparseOTUTable, DenseOTUTable])

    We do not require the matrix type to be the same as the constructor if the 
    passed in constructor is not None. The motivation is that there are use
    cases for taking a table stored as dense but loaded as sparse.

    Will raise BiomParseError if input_mat_type appears wrong or if the 
    specified constructor appears to be incorrect
    """
    if constructor is None:
        if mat_type.lower() == 'sparse':
            constructor = valid_constructors[0]
        elif mat_type.lower() == 'dense':
            constructor = valid_constructors[1]
        else:
            raise BiomParseException, "Unknown matrix_type"

    if constructor._biom_type.lower() != table_type.lower():
        raise BiomParseException, "constructor must be a biom %s" % table_type

    return constructor

def parse_biom_otu_table(json_table, constructor=None, data_pump=None):
    """Parse a biom otu table type

    Constructor must have a _biom_type of "otu table"
    """
    table_type = 'otu table'
    mat_type = json_table['matrix_type']
    constructors = [SparseOTUTable, DenseOTUTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor, 
                                  shape=json_table['shape'], 
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor, 
                                  shape=json_table['shape'], 
                                  dtype=dtype)

    return table_obj

def parse_biom_pathway_table(json_table, constructor=None, data_pump=None):
    """Parse a biom pathway table
    
    Constructor must have a _biom_type of "pathway table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'pathway table'
    constructors = [SparsePathwayTable, DensePathwayTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:    
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)

    return table_obj

def parse_biom_function_table(json_table, constructor=None, data_pump=None):
    """Parse a biom function table
    
    Constructor must have a _biom_type of "function table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'function table'
    constructors = [SparseFunctionTable, DenseFunctionTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)

    return table_obj

def parse_biom_ortholog_table(json_table, constructor=None, data_pump=None):
    """Parse a biom ortholog table

    Constructor must have a _biom_type of "ortholog table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'ortholog table'
    constructors = [SparseOrthologTable, DenseOrthologTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)

    return table_obj

def parse_biom_gene_table(json_table, constructor=None, data_pump=None):
    """Parse a biom gene table
    
    Constructor must have a _biom_type of "gene table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'gene table'
    constructors = [SparseGeneTable, DenseGeneTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    return table_obj

def parse_biom_metabolite_table(json_table, constructor=None, data_pump=None):
    """Parse a biom metabolite table

    Constructor must have a _biom_type of "metabolite table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'metabolite table'
    constructors = [SparseMetaboliteTable, DenseMetaboliteTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    return table_obj

def parse_biom_taxon_table(json_table, constructor=None, data_pump=None):
    """Parse a biom taxon table

    Constructor must have a _biom_type of "taxon table"
    """
    mat_type = json_table['matrix_type']
    table_type = 'taxon table'
    constructors = [SparseTaxonTable, DenseTaxonTable]
    constructor = pick_constructor(mat_type,table_type,constructor,constructors)

    sample_ids = [col['id'] for col in json_table['columns']]
    sample_metadata = [col['metadata'] for col in json_table['columns']]
    obs_ids = [row['id'] for row in json_table['rows']]
    obs_metadata = [row['metadata'] for row in json_table['rows']]
    dtype = MATRIX_ELEMENT_TYPE[json_table['matrix_element_type']]

    if data_pump is None:
        table_obj = table_factory(json_table['data'], sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)
    else:
        table_obj = table_factory(data_pump, sample_ids, obs_ids, 
                                  sample_metadata, obs_metadata, 
                                  constructor=constructor,
                                  shape=json_table['shape'],
                                  dtype=dtype)

    return table_obj

# map table types -> parsing methods
BIOM_TYPES = {'otu table':parse_biom_otu_table,
              'pathway table':parse_biom_pathway_table,
              'function table':parse_biom_function_table,
              'ortholog table':parse_biom_ortholog_table,
              'gene table':parse_biom_gene_table,
              'metabolite table':parse_biom_metabolite_table,
              'taxon table':parse_biom_taxon_table}

def parse_biom_table_str(json_str,constructor=None, data_pump=None):
    """Parses a JSON string of the Biom table into a rich table object.
   
    If constructor is none, the constructor is determined based on BIOM
    information

    data_pump is to allow the injection of a pre-parsed data object
    """
    json_table = json.loads(json_str)

    if constructor is None:
        f = BIOM_TYPES.get(json_table['type'].lower(), None)
    else:
        f = BIOM_TYPES.get(constructor._biom_type.lower(), None)

        # convert matrix data if the biom type doesn't match matrix type
        # of the table objects
        if constructor._biom_matrix_type != json_table['matrix_type'].lower():
            if json_table['matrix_type'] == 'dense':
                # dense -> sparse
                conv_data = []
                for row_idx,row in enumerate(json_table['data']):
                    for col_idx, value in enumerate(row):
                        if value == 0:
                            continue
                        conv_data.append([row_idx,col_idx,value])
                json_table['data'] = conv_data

            elif json_table['matrix_type'] == 'sparse':
                # sparse -> dense
                conv_data = zeros(json_table['shape'],dtype=float)
                for r,c,v in json_table['data']:
                    conv_data[r,c] = v
                json_table['data'] = [list(row) for row in conv_data]

            else:
                raise BiomParseException, "Unknown matrix_type"

    if f is None:
        raise BiomParseException, 'Unknown table type'

    return f(json_table, constructor, data_pump)

OBS_META_TYPES = {'sc_separated': lambda x: [e.strip() for e in x.split(';')],
                  'naive': lambda x: x
                  }
OBS_META_TYPES['taxonomy'] = OBS_META_TYPES['sc_separated']

def parse_classic_table_to_rich_table(lines, sample_mapping, obs_mapping, process_func,
        constructor, **kwargs):
    """Parses an table (tab delimited) (observation x sample)

    sample_mapping : can be None or {'sample_id':something}
    obs_mapping : can be none or {'observation_id':something}
    """
    sample_ids, obs_ids, data, t_md, t_md_name = parse_classic_table(lines, 
                                                        **kwargs)

    # if we have it, keep it
    if t_md is None:
        obs_metadata = None
    else:
        obs_metadata = [{t_md_name:process_func(v)} for v in t_md]

    if sample_mapping is None:
        sample_metadata = None
    else:
        sample_metadata = [sample_mapping[sample_id] for sample_id in sample_ids]

    # will override any metadata from parsed table
    if obs_mapping is not None:
        obs_metadata = [obs_mapping[obs_id] for obs_id in obs_ids]

    if constructor._biom_matrix_type == 'sparse':
        data = nparray_to_sparseobj(data)
    
    return table_factory(data, sample_ids, obs_ids, sample_metadata, 
                         obs_metadata, constructor=constructor)

def parse_classic_table(lines, delim='\t', dtype=float, header_mark=None, \
        md_parse=None):
    """Parse a classic table into (sample_ids, obs_ids, data, metadata, md_name)

    If the last column does not appear to be numeric, interpret it as 
    observation metadata, otherwise None.

    md_name is the column name for the last column if non-numeric

    NOTE: this is intended to be close to how QIIME classic OTU tables are
    parsed with the exception of the additional md_name field
    """
    if not isinstance(lines, list):
        try:
            lines = lines.readlines()
        except AttributeError:
            raise BiomParseException, "Input needs to support readlines or be indexable"

    # find header, the first line that is not empty and does not start with a #
    for idx,l in enumerate(lines):
        if not l.strip():
            continue
        if not l.startswith('#'):
            break
        if header_mark and l.startswith(header_mark):
            break

    if idx == 0:
        data_start = 1
        header = lines[0].strip().split(delim)[1:]
    else:
        if header_mark is not None:
            data_start = idx + 1
            header = lines[idx].strip().split(delim)[1:]
        else:
            data_start = idx
            header = lines[idx-1].strip().split(delim)[1:]

    # attempt to determine if the last column is non-numeric, ie, metadata
    first_values = lines[data_start].strip().split(delim)
    last_value = first_values[-1]
    last_column_is_numeric = True

    if '.' in last_value:
        try:
            float(last_value)
        except ValueError:
            last_column_is_numeric = False
    else:
        try:
            int(last_value)
        except ValueError:
            last_column_is_numeric = False
    
    # determine sample ids
    if last_column_is_numeric:
        md_name = None
        metadata = None
        samp_ids = header[:]
    else:
        md_name = header[-1]
        metadata = []
        samp_ids = header[:-1]
    
    data = []
    obs_ids = []
    for line in lines[data_start:]:
        line = line.strip()
        if not line:
            continue
        if line.startswith('#'):
            continue

        fields = line.strip().split(delim)
        obs_ids.append(fields[0])

        if last_column_is_numeric:
            values = map(dtype, fields[1:])
        else:
            values = map(dtype, fields[1:-1])

            if md_parse is not None:
                metadata.append(md_parse(fields[-1]))
            else:
                metadata.append(fields[-1])

        data.append(values)

    return samp_ids, obs_ids, asarray(data), metadata, md_name

def parse_mapping(lines, 
                  strip_quotes=True, 
                  suppress_stripping=False,
                  header = None,
                  process_fns=None):
    """Parser for map file that relates samples or observations to metadata.
    
    Format: header line with fields
            optionally other comment lines starting with #
            tab-delimited fields
    
    process_fns: a dictionary of functions to apply to metadata categories. 
     the keys should be the column headings, and the values should be
     functions which take a single value. For example, if the values in a 
     column called "taxonomy" should be split on semi-colons before being 
     added as metadata, and all other columns should be left as-is, 
     process_fns should be:
      {'taxonomy': lambda x: x.split(';')}

    Result: {first_column:{column_i:value}}, where i > 0

    Assumes the first column in the mapping file is the id

    NOTE: code pulled and modified from QIIME (http://qiime.org)
    """
    if hasattr(lines,"upper"):
        # Try opening if a string was passed
        try:
            lines = open(lines,'U')
        except IOError:
            raise BiomParseException,\
             ("A string was passed that doesn't refer "
              "to an accessible filepath.")
    
    if strip_quotes:
        if suppress_stripping:
            # remove quotes but not spaces
            strip_f = lambda x: x.replace('"','')
        else:
            # remove quotes and spaces
            strip_f = lambda x: x.replace('"','').strip()
    else:
        if suppress_stripping:
            # don't remove quotes or spaces
            strip_f = lambda x: x
        else:
            # remove spaces but not quotes
            strip_f = lambda x: x.strip()
    
    # if the user didn't provide process functions, initialize as 
    # an empty dict
    if process_fns == None:
        process_fns = {}

    # Create lists to store the results
    mapping_data = []
    header = header or []
    comments = []

    # Begin iterating over lines
    for line in lines:
        line = strip_f(line)
        if not line or (suppress_stripping and not line.strip()):
            # skip blank lines when not stripping lines
            continue

        if line.startswith('#'):
            line = line[1:]
            if not header:
                header = line.strip().split('\t')
            else:
                comments.append(line)
        else:
            # Will add empty string to empty fields
            tmp_line = map(strip_f, line.split('\t'))
            if len(tmp_line)<len(header):
                tmp_line.extend(['']*(len(header)-len(tmp_line)))
            mapping_data.append(tmp_line)

    if not header:
        raise BiomParseException, "No header line was found in mapping file."
    if not mapping_data:
        raise BiomParseException, "No data found in mapping file."

    first_col = [i[0] for i in mapping_data]
    if len(first_col) != len(set(first_col)):
        raise BiomParseException, "First column values are not unique! Cannot be ids."

    mapping = {}
    for vals in mapping_data:
        current_d = {}
        for k,v in zip(header[1:], vals[1:]):
            try:
                current_d[k] = process_fns[k](v)
            except KeyError:
                current_d[k] = v
        mapping[vals[0]] = current_d

    return mapping

def generatedby():
    """Returns a generated by string"""
    return 'BIOM-Format %s' % __version__

def convert_table_to_biom(table_f,sample_mapping, obs_mapping, process_func, constructor,
                          **kwargs):
    """Convert a contigency table to a biom table
    
    sample_mapping : dict of {'sample_id':metadata} or None
    obs_mapping : dict of {'obs_id':metadata} or None
    process_func: a function to transform observation metadata
    constructor : a biom table type
    dtype : type of table data
    """
    otu_table = parse_classic_table_to_rich_table(table_f, sample_mapping, 
                                                  obs_mapping, process_func,
                                                  constructor, **kwargs)
    return otu_table.getBiomFormatJsonString(generatedby())

def convert_biom_to_table(biom_f, header_key=None, header_value=None, \
        md_format=None):
    """Convert a biom table to a contigency table"""
    table = parse_biom_table(biom_f)

    if md_format is None:
        md_format = lambda x: '; '.join(x)

    if table.ObservationMetadata is None:
        return table.delimitedSelf()
    
    if header_key in table.ObservationMetadata[0]:
        return table.delimitedSelf(header_key=header_key, 
                                       header_value=header_value,
                                       metadata_formatter=md_format)
    else:
        return table.delimitedSelf()