20220901_invertebrate_grouping_script_2

word counting part (not finished yet)

var fs = require("fs");
var data = fs.readFileSync('/System/Volumes/Data/home/2544842260/Public/100sp.txt');
var arr = data.toString().split("\n");
arr.pop();
arr.sort();
var tmp = arr;

var wordcounter = [];
var wordlist=[]
var savelist=[]

//words counting
var wordcountertmp=[]
var wordlisttmp=[]

for (var s = 0; s < tmp.length; s ++ ){
//find the first semicolon
    posiOf1stSemicolon=tmp[s].indexOf(";");
    //get the word befor first semicolon
    var wordtmp = tmp[s].slice(0,posiOf1stSemicolon)
    //check if it is a new word. yes - add the new word. no - plus 1
    var existchecker = wordlisttmp.indexOf(wordtmp)
    if(existchecker==-1){
        wordlisttmp.push(wordtmp)
        wordcountertmp.push(1)
    }
    else{
        var posi=existchecker;
        wordcountertmp[posi]=wordcountertmp[posi]+1
    }
    //get the most counted word
    var maxTmp=0
    for (i=0;i<wordcountertmp.length;i++){
        if (maxTmp<wordcountertmp[i]){
            maxTmp=wordcountertmp
            indexOfMostCountedWordtmp=i
        }
    }
    var mostCountedWordtmp=wordlisttmp[indexOfMostCountedWordtmp]
}
//save the less counted species, and remove saved/counted item from tmp
for(var s = 0; s < tmp.length; s ++){
    if (tmp[s].indexOf(mostCountedWordtmp) == -1){
        savelist.push(tmp[s])
        tmp.splice(s,1)
    }
    else{
        tmp[s]=tmp[s].slice(posiOf1stSemicolon+1)
    }
}



console.log(wordcountertmp)
console.log(wordlisttmp)
console.log(tmp)
console.log(maxTmp)
console.log(indexOfMostCountedWordtmp)
console.log(mostCountedWordtmp)
console.log(savelist)

I think there should be another loop over the code for counting. and I will try next time.

do
{
...
}
while (maxTmp<(arr.length/8));
  • 20220901_invertebrate_grouping_script_2.1662019938.txt.gz
  • 最終更新: 2022/09/01 08:12
  • by 133.11.50.163