/usr/share/hyphy/TemplateBatchFiles/CleanGaps.bf is in hyphy-common 2.2.7+dfsg-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | function _standardAnalysisBFHelp (_what)
{
if (_what == "Synopsis")
{
return "Filter "gappy" columns in a sequence alignments: i.e. those that contain fewer than a given proportion of sequences with fully or partially resolved characters";
}
if (_what == "Input")
{
return "A sequence alignment";
}
if (_what == "Output")
{
return "A sequence alignment with gappy columns stripped out";
}
if (_what == "Options")
{
_options = {};
_options ["Filtering threshold"] = "Minimum percent of informative sequences per site to retain the site";
_options ["Informative characters"] = "Define an informative character as either a fully resolved charatcer (e.g. A) or a partial ambiguity (e.g. R)";
return _options
}
if (_what == "Further")
{
return "";
}
if (_what == "Author")
{
return "Sergei L Kosakovsky Pond (spond@ucsd.edu)";
}
if (_what == "Version")
{
retrun "1.00";
}
if (_what == "Date")
{
return "20081215";
}
return "";
}
/*--------------------------------------------------------------------------*/
ExecuteAFile ("Utility/GrabBag.bf");
SetDialogPrompt ("Please choose a data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
fprintf (stdout, "\nRead an alignment on ", ds.species, " sequences with ", ds.sites, " sites from ", LAST_FILE_PATH);
if (IS_TREE_PRESENT_IN_DATA)
{
fprintf (stdout, "\nTree In Data:", DATAFILE_TREE);
}
DataSetFilter all = CreateFilter (ds, 1, "", "");
options ={{"Completely resolved", "Only count completely unambiguious characters (e.g. A,C,G,T for nucleotides) as informative"}
{"Partially resolved", "Also count partially resolved characters (e.g. R,Y,M,S etc for nucleotides)"}};
ChoiceList (filteringOption,"Informative characters?",1,SKIP_NONE,options);
if (filteringOption < 0)
{
return 0;
}
fprintf (stdout, "\n");
gating_thresh = prompt_for_a_value ("Retain sites with at least this proportion of informative sites:",0.1,0,1,0);
gating_thresh_seq = (gating_thresh * all.species+0.5)$1;
fprintf (stdout, "Selected informative sites option '", options[filteringOption][0], "' and filtering threshold of '", gating_thresh, "'\n");
retainSites = {};
GetDataInfo (charInfo, all, "CHARACTERS");
GetDataInfo (siteToPatternMap, all);
charCount = Columns (charInfo);
template = {1,charCount}["1"];
passcode = 2;
if (filteringOption == 1)
{
passcode = charCount;
}
for (site = 0; site < all.unique_sites; site = site+1)
{
seq_count = 0;
for (sequence = 0; sequence < all.species; sequence = sequence + 1)
{
GetDataInfo (thisChar, all, sequence, site);
if ((template*thisChar)[0] < passcode)
{
seq_count = seq_count + 1;
if (seq_count >= gating_thresh_seq)
{
break;
}
}
}
if (seq_count >= gating_thresh_seq)
{
retainSites [site] = 1;
}
SetParameter (STATUS_BAR_STATUS_STRING, "Processing pattern "+(site+1)+"/"+all.unique_sites,0);
}
DataSetFilter filtered = CreateFilter (all, 1, retainSites[siteToPatternMap[siteIndex]]);
fprintf (stdout, "\nRetained ", filtered.sites, "/", all.sites, " sites\n");
SetDialogPrompt ("Saved the filtered alignment to:");
fprintf (PROMPT_FOR_FILE, CLEAR_FILE, filtered);
|