/usr/share/mmass/mspy/mod_proteo.py is in mmass 5.1.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 | # -------------------------------------------------------------------------
# Copyright (C) 2005-2012 Martin Strohalm <www.mmass.org>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# Complete text of GNU GPL can be found in the file LICENSE.TXT in the
# main directory of the program.
# -------------------------------------------------------------------------
# load libs
import re
import itertools
# load stopper
from mod_stopper import CHECK_FORCE_QUIT
# load building blocks
import blocks
# load objects
import obj_sequence
# SEQUENCE DIGESTION
# ------------------
def digest(sequence, enzyme, miscleavage=0, allowMods=False, strict=True):
"""Digest seuence by specified enzyme.
sequence: (sequence) mspy sequence object
enzyme: (str) enzyme name - must be defined in mspy.enzymes
miscleavage: (int) number of allowed misscleavages
allowMods: (bool) do not care about modifications in cleavage site
strict: (bool) do not cleave even if variable modification is in cleavage site
"""
# check sequence object
if not isinstance(sequence, obj_sequence.sequence):
raise TypeError, "Cannot digest non-sequence object!"
# check cyclic peptides
if sequence.chainType != 'aminoacids':
raise TypeError, 'Digest function is not supported for non-amino sequences!'
# check cyclic peptides
if sequence.cyclic:
raise TypeError, 'Digest function is not supported for cyclic peptides!'
# check sequence
if not sequence.chain:
return []
# get enzyme
if enzyme in blocks.enzymes:
enzyme = blocks.enzymes[enzyme]
expression = re.compile(enzyme.expression+'$')
else:
raise KeyError, 'Unknown enzyme! -> ' + enzyme
# get digest indices
slices = [] # from | to | miscl
lastIndex = 0
peptide = ''
for x, aa in enumerate(sequence):
# check expression
peptide += aa
if expression.search(peptide):
# skip not allowed modifications
if not allowMods and sequence.ismodified(x-1, strict) and not enzyme.modsBefore:
continue
elif not allowMods and sequence.ismodified(x, strict) and not enzyme.modsAfter:
continue
else:
slices.append((lastIndex, x, 0))
lastIndex = x
# add last peptide
slices.append((lastIndex, x+1, 0))
# add indices for partials
indices = len(slices)
for x in range(indices):
for y in range(1, miscleavage+1):
if x+y < indices:
slices.append((slices[x][0], slices[x+y][1], y))
else:
break
# get peptides slices from protein
peptides = []
for indices in slices:
CHECK_FORCE_QUIT()
# get peptide
peptide = sequence[indices[0]:indices[1]]
peptide.miscleavages = indices[2]
# add terminal groups
if indices[0] != 0:
peptide.nTermFormula = enzyme.nTermFormula
if indices[1] != len(sequence):
peptide.cTermFormula = enzyme.cTermFormula
peptides.append(peptide)
return peptides
# ----
def coverage(ranges, length, human=True):
"""Calculate sequence coverage.
ranges: (list of mspy.sequence or list of user ranges (start,stop))
length: (int) parent sequence length
human: (bool) ranges in human (True) or computer (False) indexes
"""
# check data
if not ranges:
return 0.
# make a blank sequence
blank = length*[0]
# list of ranges
for r in ranges:
if human:
for x in range(r[0]-1, r[1]):
blank[x]=(1)
else:
for x in range(r[0], r[1]):
blank[x]=(1)
# get sequence coverage
return 100.0*blank.count(1)/length
# ----
# SEQUENCE FRAGMENTATION
# ----------------------
def fragment(sequence, series, scrambling=False):
"""Generate list of neutral peptide fragments from given peptide.
sequence: (sequence) mspy sequence object
series: (list) list of fragment serie names - must be defined in mspy.fragments
scrambling: (int) allow sequence scrambling
"""
frags = []
scramblingFilter = ('M')
# check sequence object
if not isinstance(sequence, obj_sequence.sequence):
raise TypeError, "Cannot fragment non-sequence object!"
# generate fragments for linear peptide
if not sequence.cyclic:
for serie in series:
frags += fragmentserie(sequence, serie)
# generate fragments for cyclic peptide
else:
for peptide in sequence.linearized():
for serie in series:
frags += fragmentserie(peptide, serie, cyclicParent=True)
# generate scrambling fragments
if scrambling:
buff = []
for frag in frags:
# check fragment
if len(frag) <= 2 or not frag.fragmentSerie in ('a', 'b', 'M'):
continue
elif frag.fragmentSerie == 'M' and sequence.cyclic:
continue
# generate fragments
for peptide in frag.linearized():
for serie in series:
if not serie in scramblingFilter:
buff += fragmentserie(peptide, serie, cyclicParent=sequence.cyclic)
frags += buff
# remove same fragments
buff = []
have = []
for frag in frags:
frhash = [frag.fragmentSerie] + frag.indexes()
if frag.fragmentSerie == 'M':
frhash.sort()
if not frhash in have:
buff.append(frag)
have.append(frhash)
frags = buff
return frags
# ----
def fragmentserie(sequence, serie, cyclicParent=False):
"""Generate list of neutral peptide fragments from given peptide.
sequence: (sequence) mspy sequence object
serie: (str) fragment serie name - must be defined in mspy.fragments
"""
# check sequence object
if not isinstance(sequence, obj_sequence.sequence):
raise TypeError, "Cannot fragment non-sequence object!"
# check cyclic peptides
if sequence.cyclic:
raise TypeError, 'Direct fragmentation of cyclic peptides is not supported!'
frags = []
length = len(sequence)
# get serie definition
serie = blocks.fragments[serie]
# molecular ion
if serie.terminus == 'M':
frag = sequence[:]
frag.fragmentSerie = serie.name
frags.append(frag)
# N-terminal fragments
elif serie.terminus == 'N':
for x in range(length):
frag = sequence[:x+1]
frag.fragmentSerie = serie.name
frag.fragmentIndex = (x+1)
frag.cTermFormula = serie.cTermFormula
frags.append(frag)
CHECK_FORCE_QUIT()
# C-terminal fragments
elif serie.terminus == 'C':
for x in range(length):
frag = sequence[length-(x+1):]
frag.fragmentSerie = serie.name
frag.fragmentIndex = (x+1)
frag.nTermFormula = serie.nTermFormula
frags.append(frag)
CHECK_FORCE_QUIT()
# singlet fragments
elif serie.terminus == 'S':
for x in range(length):
frag = sequence[x:x+1]
frag.fragmentSerie = serie.name
frag.fragmentIndex = (x+1)
frag.nTermFormula = serie.nTermFormula
frag.cTermFormula = serie.cTermFormula
frags.append(frag)
CHECK_FORCE_QUIT()
# internal fragments
elif serie.terminus == 'I':
for x in range(1,length-1):
for y in range(2,length-x):
frag = sequence[x:x+y]
frag.fragmentSerie = serie.name
frag.nTermFormula = serie.nTermFormula
frag.cTermFormula = serie.cTermFormula
frags.append(frag)
CHECK_FORCE_QUIT()
# correct termini for cyclic peptides
if cyclicParent:
for frag in frags:
if serie.terminus == 'M':
frag.nTermFormula = ''
frag.cTermFormula = ''
elif serie.terminus == 'N':
frag.nTermFormula = 'H'
elif serie.terminus == 'C':
frag.cTermFormula = 'H-1'
# remove nonsense terminal fragments
if serie.terminus == 'N':
if frags and serie.nTermFilter:
del frags[0]
if frags and serie.cTermFilter:
del frags[-1]
elif serie.terminus == 'C':
if frags and serie.nTermFilter:
del frags[-1]
if frags and serie.cTermFilter:
del frags[0]
elif serie.terminus == 'S':
if frags and serie.nTermFilter:
del frags[0]
if frags and serie.cTermFilter:
del frags[-1]
return frags
# ----
def fragmentlosses(fragments, losses=[], defined=False, limit=1, filterIn={}, filterOut={}):
"""Apply specified neutral losses to fragments.
fragments: (list) list of sequence fragments
losses: (list) list of neutral losses
defined: (bool) use monomer-defined neutral losses
limit: (int) max length of loss combination
filterIn: (dic) allowed series for specified losses
filterOut: (dic) not allowed series for specified losses
"""
# make losses combinations
combinations = []
for x in range(1, min(len(losses), limit) + 1):
for c in itertools.combinations(losses, x):
combinations.append(list(c))
# generate fragments
buff = []
for frag in fragments:
CHECK_FORCE_QUIT()
# get monomer-defined losses to check specifity
definedLosses = []
for monomer in frag:
definedLosses += blocks.monomers[monomer].losses
# append new combinations with monomer-defined losses
lossesToApply = combinations[:]
if defined:
for monomer in frag:
for item in ([[]] + lossesToApply[:]):
for loss in blocks.monomers[monomer].losses:
newItem = item + [loss]
newItem.sort()
if not [loss] in lossesToApply:
lossesToApply.append([loss])
if len(newItem) <= limit and not newItem in lossesToApply:
lossesToApply.append(newItem)
# make fragment
for combination in lossesToApply:
newFrag = frag.duplicate()
skip = False
# apply losses
for loss in combination:
newFrag.fragmentLosses.append(loss)
# check neutral gains
if loss in frag.fragmentGains:
skip = True
break
# check fragment type filter
if (loss in filterOut and frag.fragmentSerie in filterOut[loss]) \
or (loss in filterIn and not frag.fragmentSerie in filterIn[loss]):
skip = True
break
# check fragment composition
if not newFrag.isvalid():
skip = True
break
# filter non-specific losses
if not loss in definedLosses:
newFrag.fragmentFiltered = True
# store fragment
if not skip:
buff.append(newFrag)
return buff
# ----
def fragmentgains(fragments, gains=[], filterIn={'H2O':['b'], 'CO':['b', 'c', 'break']}, filterOut={}):
"""Apply specified neutral gains to fragments.
fragments: (list) list of sequence fragments
gains: (list) list of neutral gains
filterIn: (dic) allowed series for specified gains
filterOut: (dic) not allowed series for specified gains
"""
# generate fragments
buff = []
for frag in fragments:
CHECK_FORCE_QUIT()
# is parent cyclic?
cyclicParent = False
for item in frag.history:
if 'break' in item:
cyclicParent = True
break
# apply gains
for gain in gains:
# check neutral losses
if gain in frag.fragmentLosses:
continue
# check fragment type filters
if (gain in filterOut and frag.fragmentSerie in filterOut[gain]) \
or (gain in filterIn and not frag.fragmentSerie in filterIn[gain]):
continue
# check break (cyclic parent)
if gain in filterIn and 'break' in filterIn[gain] and not cyclicParent:
continue
# make fragment
newFrag = frag.duplicate()
newFrag.fragmentGains.append(gain)
# check fragment composition
if not newFrag.isvalid():
continue
# store fragment
buff.append(newFrag)
return buff
# ----
|