/usr/share/slib/ncbi-dna.scm is in slib 3b1-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | ;;;; "ncbi-dna.scm" Read and manipulate NCBI-format nucleotide sequences
;;; Copyright (C) 2003 Aubrey Jaffer
;
;Permission to copy this software, to modify it, to redistribute it,
;to distribute modified versions, and to use it for any purpose is
;granted, subject to the following restrictions and understandings.
;
;1. Any copy made of this software must include this copyright notice
;in full.
;
;2. I have made no warranty or representation that the operation of
;this software will be error-free, and I am under no obligation to
;provide any services, by way of maintenance, update, or otherwise.
;
;3. In conjunction with products arising from the use of this
;material, there shall be no use of my name in any advertising,
;promotional, or sales literature without prior written consent in
;each case.
(require 'array)
(require 'scanf)
(require 'string-case)
(require 'string-search)
(require 'array-for-each)
(require-if 'compiling 'printf) ;used by cDNA:report-base-count
;;@code{(require 'ncbi-dma)}
;;@ftindex ncbi-dma
(define (ncbi:read-DNA-line port)
(define lst (scanf-read-list
" %d %[acgt] %[acgt] %[acgt] %[acgt] %[acgt] %[acgt]" port))
(cond ((or (null? lst) (eof-object? lst)) #f)
((not (eqv? 1 (modulo (car lst) 60)))
(slib:warn 'bad 'idx lst) #f)
(else (apply string-append (cdr lst)))))
;;@body
;;Reads the NCBI-format DNA sequence following the word @samp{ORIGIN}
;;from @1.
(define (ncbi:read-DNA-sequence port)
(find-string-from-port? "ORIGIN" port)
(find-string-from-port? (string #\newline) port)
(do ((lne (ncbi:read-DNA-line port) (ncbi:read-DNA-line port))
(lns '() (cons lne lns)))
((not lne) (apply string-append (reverse lns)))))
;;@body
;;Reads the NCBI-format DNA sequence following the word @samp{ORIGIN}
;;from @1.
(define (ncbi:read-file file)
(call-with-input-file file ncbi:read-DNA-sequence))
;;@body
;;Replaces @samp{T} with @samp{U} in @1
(define (mRNA<-cDNA str)
(array-for-each
(lambda (chr)
(case chr
((#\a) #\a)
((#\t) #\u)
((#\c) #\c)
((#\g) #\g)
((#\A) #\A)
((#\T) #\U)
((#\C) #\C)
((#\G) #\G)
(else chr)))
str))
(define cDNA:codons
'((TTT phe #\F) (TCT ser #\S) (TAT tyr #\Y) (TGT cys #\C)
(TTC phe #\F) (TCC ser #\S) (TAC tyr #\Y) (TGC cys #\C)
(TTA leu #\L) (TCA ser #\S) (TAA) (TGA) ;stops
(TTG leu #\L) (TCG ser #\S) (TAG) (TGG trp #\W)
(CTT leu #\L) (CCT pro #\P) (CAT his #\H) (CGT arg #\R)
(CTC leu #\L) (CCC pro #\P) (CAC his #\H) (CGC arg #\R)
(CTA leu #\L) (CCA pro #\P) (CAA gln #\Q) (CGA arg #\R)
(CTG leu #\L) (CCG pro #\P) (CAG gln #\Q) (CGG arg #\R)
(ATT ile #\I) (ACT thr #\T) (AAT asn #\N) (AGT ser #\S)
(ATC ile #\I) (ACC thr #\T) (AAC asn #\N) (AGC ser #\S)
(ATA ile #\I) (ACA thr #\T) (AAA lys #\K) (AGA arg #\R)
(ATG met #\M) (ACG thr #\T) (AAG lys #\K) (AGG arg #\R)
(GTT val #\V) (GCT ala #\A) (GAT asp #\D) (GGT gly #\G)
(GTC val #\V) (GCC ala #\A) (GAC asp #\D) (GGC gly #\G)
(GTA val #\V) (GCA ala #\A) (GAA glu #\E) (GGA gly #\G)
(GTG val #\V) (GCG ala #\A) (GAG glu #\E) (GGG gly #\G)))
;;@body
;;Returns a list of three-letter symbol codons comprising the protein
;;sequence encoded by @1 starting with its first occurence of
;;@samp{atg}.
(define (codons<-cDNA cDNA)
(define len (string-length cDNA))
(define start #f)
(set! start (substring-ci? "atg" cDNA))
(if (not start) (slib:warn 'missed 'start))
(let loop ((protein '(*N*))
(cdx (or start 0)))
(if (<= len cdx) (slib:error 'reached 'end cdx))
(let ((codon (string-ci->symbol (substring cDNA cdx (+ 3 cdx)))))
(define asc (assq codon cDNA:codons))
(cond ((not asc)
(slib:warn 'mystery 'codon codon)
(reverse (cons '*C* protein)))
((null? (cdr asc)) (reverse (cons '*C* protein)))
(else (loop (cons codon protein) (+ 3 cdx)))))))
;;@body
;;Returns a list of three-letter symbols for the protein sequence
;;encoded by @1 starting with its first occurence of @samp{atg}.
(define (protein<-cDNA cDNA)
(define len (string-length cDNA))
(define start #f)
(set! start (substring-ci? "atg" cDNA))
(if (not start) (slib:warn 'missed 'start))
(let loop ((protein '(*N*))
(cdx (or start 0)))
(if (<= len cdx) (slib:error 'reached 'end cdx))
(let ((codon (string-ci->symbol (substring cDNA cdx (+ 3 cdx)))))
(define asc (assq codon cDNA:codons))
(cond ((not asc)
(slib:warn 'mystery 'codon codon)
(reverse (cons '*C* protein)))
((null? (cdr asc)) (reverse (cons '*C* protein)))
(else (loop (cons (cadr asc) protein) (+ 3 cdx)))))))
;;@body
;;Returns a string of one-letter amino acid codes for the protein
;;sequence encoded by @1 starting with its first occurence of
;;@samp{atg}.
(define (P<-cDNA cDNA)
(define len (string-length cDNA))
(define start #f)
(set! start (substring-ci? "atg" cDNA))
(if (not start) (slib:warn 'missed 'start))
(let loop ((protein '())
(cdx (or start 0)))
(if (<= len cdx) (slib:error 'reached 'end cdx))
(let ((codon (string-ci->symbol (substring cDNA cdx (+ 3 cdx)))))
(define asc (assq codon cDNA:codons))
(cond ((not asc) (slib:error 'mystery 'codon codon))
((null? (cdr asc)) (list->string (reverse protein)))
(else (loop (cons (caddr asc) protein) (+ 3 cdx)))))))
;;@
;;These cDNA count routines provide a means to check the nucleotide
;;sequence with the @samp{BASE COUNT} line preceding the sequence from
;;NCBI.
;;@body
;;Returns a list of counts of @samp{a}, @samp{c}, @samp{g}, and
;;@samp{t} occurrencing in @1.
(define (cDNA:base-count cDNA)
(define cnt:a 0)
(define cnt:c 0)
(define cnt:g 0)
(define cnt:t 0)
(array-for-each (lambda (chr)
(case chr
((#\a #\A) (set! cnt:a (+ 1 cnt:a)))
((#\c #\C) (set! cnt:c (+ 1 cnt:c)))
((#\g #\G) (set! cnt:g (+ 1 cnt:g)))
((#\t #\T) (set! cnt:t (+ 1 cnt:t)))
(else (slib:error 'cDNA:base-count 'unknown 'base chr))))
cDNA)
(list cnt:a cnt:c cnt:g cnt:t))
;;@body
;;Prints the counts of @samp{a}, @samp{c}, @samp{g}, and @samp{t}
;;occurrencing in @1.
(define (cDNA:report-base-count cDNA)
(require 'printf)
(apply printf "BASE COUNT %6d a %6d c %6d g %6d t\\n"
(cDNA:base-count cDNA)))
|