/usr/share/EMBOSS/acd/domainnr.acd is in embassy-domainatrix 0.1.660-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | application: domainnr [
documentation: "Remove redundant domains from a DCF file."
groups: "Utils:Database creation, Protein:Domains"
gui: "yes"
batch: "yes"
cpu: "medium"
embassy: "domainatrix"
relations: "EDAM_topic:0091 Data handling"
relations: "EDAM_topic:0736 Protein domains and folds"
relations: "EDAM_topic:0182 Sequence alignment"
relations: "EDAM_operation:0290 Sequence redundancy removal"
]
section: input [
information: "Input section"
type: "page"
]
infile: dcfinfile [
parameter: "Y"
information: "Domain classification file"
help: "This option specifies name of DCF file (domain
classification file) (input). A 'domain classification file'
contains classification and other data for domains from SCOP or
CATH, in DCF format (EMBL-like). The files are generated by using
SCOPPARSE and CATHPARSE. Domain sequence information can be added
to the file by using DOMAINSEQS."
knowntype: "Domain classification"
relations: "EDAM_data:0900 Protein domain classification"
]
matrixf: datafile [
additional: "Y"
information: "Residue substitution matrix."
help: "This option specifies the residue substitution matrix. This
is used for sequence comparison."
default: "EBLOSUM62"
relations: "EDAM_data:1449 Comparison matrix (amino acid)"
]
toggle: retain [
standard: "Y"
information: "Write redundant domains to separate file."
help: "This option specifies whether to write redundant domains to
a separate file. If this option is selected, redundant domains
are written to a separate output file."
default: "N"
relations: "EDAM_data:2527 Parameter"
]
endsection: input
section: required [
information: "Required section"
type: "page"
]
list: node [
standard: "Y"
default: "1"
minimum: "1"
maximum: "1"
values: "1: Class (SCOP), 2: Fold (SCOP), 3: Superfamily (SCOP),
4: Family (SCOP), 5: Class (CATH), 6: Architecture (CATH), 7:
Topology (CATH), 8: Homologous Superfamily (CATH), 9: Family
(CATH)"
help: "This option specifies the node for redundancy removal.
Redundancy can be removed at any specified node in the SCOP or
CATH hierarchies. For example by selecting 'Class' entries
belonging to the same Class will be non-redundant."
delimiter: ","
codedelimiter: ":"
header: "Node at which to remove redundancy"
information: "Select number."
relations: "EDAM_data:2527 Parameter"
]
list: mode [
standard: "Y"
default: "1"
minimum: "1"
maximum: "1"
values: "1: Remove redundancy at a single threshold % sequence
similarity,2: Remove redundancy outside a range of acceptable
threshold % similarity"
help: "This option specifies whether to remove redundancy at a
single threshold % sequence similarity or remove redundancy
outside a range of acceptable threshold % similarity. All
permutations of pair-wise sequence alignments are calculated for
each domain family in turn using the EMBOSS implementation of the
Needleman and Wunsch global alignment algorithm. Redundant
sequences are removed in one of two modes as follows: (i) If a
pair of proteins achieve greater than a threshold percentage
sequence similarity (specified by the user) the shortest sequence
is discarded. (ii) If a pair of proteins have a percentage
sequence similarity that lies outside an acceptable range
(specified by the user) the shortest sequence is discarded."
delimiter: ","
codedelimiter: ":"
header: "Redundancy removal options"
information: "Select number."
relations: "EDAM_data:2527 Parameter"
]
float: threshold [
standard: "@($(mode)==1)"
information: "The % sequence identity redundancy
threshold."
help: "This option specifies the % sequence identity redundancy
threshold, which determines the redundancy calculation. If a pair
of proteins achieve greater than this threshold the shortest
sequence is discarded."
default: "95.0"
relations: "EDAM_data:2146 Threshold"
]
float: threshlow [
standard: "@($(mode)==2)"
information: "The % sequence identity redundancy threshold
(lower limit)"
help: "This option specifies the % sequence identity redundancy
threshold, which determines the redundancy calculation. If a pair
of proteins have a percentage sequence similarity that lies
outside an acceptable range the shortest sequence is discarded."
default: "30.0"
relations: "EDAM_data:2146 Threshold"
]
float: threshup [
standard: "@($(mode)==2)"
information: "The % sequence identity redundancy threshold
(upper limit)."
help: "This option specifies the % sequence identity redundancy
threshold, which determines the redundancy calculation. If a pair
of proteins have a percentage sequence similarity that lies
outside an acceptable range the shortest sequence is discarded."
default: "90.0"
relations: "EDAM_data:2146 Threshold"
]
endsection: required
section: additional [
information: "Additional section"
type: "page"
]
float: gapopen [
additional: "Y"
information: "Gap insertion penalty"
minimum: "1."
maximum: "100."
default: "10"
valid: "Floating point number from 1.0 to 100.0"
expected: "10.0 for any sequence"
help: "This option specifies the gap insertion penalty. This is
the score taken away when a gap is created. The best value depends
on the choice of comparison matrix. The default value assumes you
are using the EBLOSUM62 matrix for protein sequences, and the
EDNAFULL matrix for nucleotide sequences."
relations: "EDAM_data:1397 Gap opening penalty"
]
float: gapextend [
additional: "Y"
information: "Gap extension penalty"
minimum: "0.0"
maximum: "10."
default: "0.5"
valid: "Floating point number from 0.0 to 10.0"
expected: "0.5 for any sequence"
help: "This option specifies the gap extension penalty. This is
added to the standard gap penalty for each base or residue in the
gap. This is how long gaps are penalized. Usually you will expect
a few long gaps rather than many short gaps, so the gap extension
penalty should be lower than the gap penalty."
relations: "EDAM_data:1398 Gap extension penalty"
]
endsection: additional
section: advanced [
information: "Advanced section"
type: "page"
]
endsection: advanced
section: output [
information: "Output section"
type: "page"
]
outfile: dcfoutfile [
parameter: "Y"
information: "Domain classification output file"
help: "This option specifies the name of non-redundant DCF file
(domain classification file) (output). A 'domain classification
file' contains classification and other data for domains from SCOP
or CATH, in DCF format (EMBL-like). The files are generated by
using SCOPPARSE and CATHPARSE. Domain sequence information can be
added to the file by using DOMAINSEQS."
default: "test.scop"
knowntype: "Domain classification"
relations: "EDAM_data:0900 Protein domain classification"
]
outfile: redoutfile [
standard: "$(retain)"
information: "Domain classification redundant output file
(optional)"
help: "This option specifies the name of DCF file (domain
classification file) for redundant sequences (output). A 'domain
classification file' contains classification and other data for
domains from SCOP or CATH, in DCF format (EMBL-like). The files
are generated by using SCOPPARSE and CATHPARSE. Domain sequence
information can be added to the file by using DOMAINSEQS."
nullok: "Y"
nulldefault: "Y"
extension: "scop"
knowntype: "Domain classification"
relations: "EDAM_data:0900 Protein domain classification"
]
outfile: logfile [
standard: "Y"
information: "Domainatrix log output file"
help: "This option specifies the name of log file for the build.
The log file contains messages about any errors arising while
domainnr ran."
default: "domainnr.log"
knowntype: "domainatrix log"
relations: "EDAM_data:1678 Tool log"
]
endsection: output
|