/usr/bin/bp_process_sgd is in bioperl 1.6.901-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
# This script will convert from SGD format to GFF format
# See http://db.yeastgenome.org/schema/Schema.html
use strict;
# hard-coded length data that I couldn't get directly
my %CHROMOSOMES = (I => 230_203,
II => 813_139,
III => 316_613,
IV => 1_531_929,
V => 576_869,
VI => 270_148,
VII => 1_090_937,
VIII => 562_639,
IX => 439_885,
X => 745_444,
XI => 666_445,
XII => 1_078_173,
XIII => 924_430,
XIV => 784_328,
XV => 1_091_284,
XVI => 948_061,
Mit => 85_779);
my @ROMAN = qw(I II III IV V VI VII VIII IX X
XI XII XIII XIV XV XVI Mit);
if ($ARGV[0] =~ /^--?h/) {
die <<USAGE;
Usage: $0 <SGD features file>
This script massages the SGD sequence annotation flat files located at
ftp://genome-ftp.stanford.edu/pub/yeast/data_dump/feature/chromosomal_features.tab
into a version of the GFF format suitable for display by the generic
genome browser.
To use this script, get the SGD chromosomal_features.tab file from the
FTP site listed above, and run the following command:
% process_sgd.pl chromosomal_features.tab > yeast.gff
The yeast.gff file can then be loaded into a Bio::DB::GFF database
using the following command:
% bulk_load_gff.pl -d <databasename> yeast.gff
USAGE
;
}
# first print out chromosomes
# We hard coded the lengths because they are not available in the features table.
for my $chrom (sort keys %CHROMOSOMES) {
print join("\t",$chrom,'chromosome','Component',1,$CHROMOSOMES{$chrom},'.','.','.',qq(Sequence "$chrom")),"\n";
}
# this is hard because the SGD idea of a feature doesn't really map onto the GFF idea.
while (<>) {
chomp;
my($id,$gene,$aliases,$type,$chromosome,$start,$stop,$strand,$sgdid,$sgdid2,$description,$date) = split "\t";
my $ref = $ROMAN[$chromosome-1];
$description =~ s/"/\\"/g;
$description =~ s/;/\\;/g;
$strand = $strand eq 'W' ? '+' : '-';
($start,$stop) = ($stop,$start) if $strand eq '-';
die "Strand logic is messed up" if $stop < $start;
if ($gene) {
my @aliases = split(/\|/,$aliases);
my $aliases = join " ; ",map {qq(Alias "$_")} @aliases;
my $group = qq(Gene "$gene" ; Note "$description");
$group .= " ; $aliases" if $aliases;
print join("\t",$ref,'sgd','gene',$start,$stop,'.',$strand,'.',$group),"\n";
$description .= "\\; AKA @aliases" if @aliases;
}
print join("\t",$ref,'sgd',$type,$start,$stop,'.',$strand,'.',qq($type "$id" ; Note "$description")),"\n";
}
__END__
=head1 NAME
process_sgd.pl - Massage SGD annotation flat files into a version suitable for the Generic Genome Browser
=head1 SYNOPSIS
% process_sgd.pl chromosomal_features.tab > yeast.gff
=head1 DESCRIPTION
This script massages the SGD sequence annotation flat files located at
ftp://genome-ftp.stanford.edu/pub/yeast/data_dump/feature/chromosomal_features.tab
into a version of the GFF format suitable for display by the generic
genome browser.
To use this script, get the SGD chromosomal_features.tab file from the
FTP site listed above, and run the following command:
% process_sgd.pl chromosomal_features.tab > yeast.gff
The yeast.gff file can then be loaded into a Bio::DB::GFF database
using the following command:
% bulk_load_gff.pl -d <databasename> yeast.gff
=head1 SEE ALSO
L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
=head1 AUTHOR
Lincoln Stein, lstein@cshl.org
Copyright (c) 2002 Cold Spring Harbor Laboratory
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. See DISCLAIMER.txt for
disclaimers of warranty.
=cut
|