20220928_invertebrate_grouping_script_finished

//node grouping.js inputFilePath
//read species file
var fs = require("fs");
var arg=process.argv.splice(2);
file=arg[0]
//var data = fs.readFileSync('/System/Volumes/Data/home/2544842260/Public/100again.txt');
var data = fs.readFileSync(file);

//change the data to 1D-array
var arr = data.toString().trim().split("\n");

arr.sort();
var processList=[]
var speciesNumber=arr.length
var saveList=[]

//change the data to 2D-array
for (var species=0; species<speciesNumber; species++) {
    processList[species]=arr[species].split(";")
}

var countNumber=speciesNumber

//COUNTING the words, and separate the list
var round=0
do{
    var counttmp=[]
    var wordListtmp=[]
    for (var s=0; s<processList.length;s++){
        var existchecker="nonexist"
        for (i=0;i<wordListtmp.length;i++){
            if (processList[s][round]===wordListtmp[i]){
                existchecker="exist"
                var posilog=i
            }
        }
        if (existchecker=="exist"){
            counttmp[posilog]=counttmp[posilog]+1
        }
        else{
            wordListtmp.push(processList[s][round])
            counttmp.push(1)
        }
    }
    //get nost counted word
    var countNumbertmp=0
    for (var j=0; j<wordListtmp.length;j++){
        if(countNumbertmp<counttmp[j]){
            countNumbertmp=counttmp[j]
            var indexOfCountNumber=j
        }
        var mostCountedWord=wordListtmp[indexOfCountNumber]
    }
    //separate the species list if need
    if (countNumbertmp!==countNumber){
        //separate the list to save list and process list
        var savelisttmp=[]
        var processListtmp=[]
        processNumber=processList.length
        for (var k=0;k<processNumber;k++){
            if (mostCountedWord!==processList[k][round]){
                savelisttmp.push(processList[k])
            }
            else{
                processListtmp.push(processList[k])
            }
        }
        if (savelisttmp.length!==0){
            saveList.push(savelisttmp)
        }
        processList=processListtmp
    }
    countNumber=countNumbertmp
    var time=round+1
    //console.log("round "+time)
    //console.log(counttmp)
    //console.log(countNumber)
    //console.log(mostCountedWord)
    //console.log(saveList.length)
    //console.log(processList.length)
    //console.log(" ")
round=round+1
}
while (countNumber>(speciesNumber/8))


//get final separate list and counts
var separatedList=[]
separatedList.push(processList)
for (var q=0; q<saveList.length; q++){
    separatedList.push(saveList[q])
}

var separatedCountList=[]
for (var t=0; t<separatedList.length;t++){
    separatedCountList[t]=separatedList[t].length
}




//GROUPING
var operationseparatedCountList=separatedCountList
var operationseparatedList=separatedList
do{
    var groupinglisttmp=[]
    console.log("-------round-------")
    var groupingtmp=[]
    console.log("------"+groupinglisttmp.length)
    var d=speciesNumber
    var positionsaver=0
    for(var i=0;i<operationseparatedCountList.length;i++){
        var a=operationseparatedCountList[i]
        var b=operationseparatedCountList[i+1]
        var c=a+b
        if (c<d){
            d=c
            positionsaver=i
        }
    }
    console.log("----positionsaver----")
    console.log(positionsaver)
    console.log("----species number----")
    for (var j=0;j<positionsaver;j++){
        groupinglisttmp.push(operationseparatedList[j])
        console.log(operationseparatedList[j].length)
        console.log("------"+groupinglisttmp.length)
    }
    for (var s=0;s<operationseparatedList[positionsaver].length;s++){
        groupingtmp.push(operationseparatedList[positionsaver][s])
    }
    for (var s=0;s<operationseparatedList[positionsaver+1].length;s++){
        groupingtmp.push(operationseparatedList[positionsaver+1][s])
    }
    groupinglisttmp.push(groupingtmp)
    console.log("----combine----")
    console.log(groupingtmp.length)
    console.log("------"+groupinglisttmp.length)
    for (var j=positionsaver+2;j<operationseparatedCountList.length;j++){
        groupinglisttmp.push(operationseparatedList[j])
        console.log(operationseparatedList[j].length)
        console.log("------"+groupinglisttmp.length)
    }

    if (groupinglisttmp[-1]=undefined){
        groupinglisttmp.pop()
    }
    operationseparatedCountList=[]
    for (var t=0; t<groupinglisttmp.length;t++){
        operationseparatedCountList[t]=groupinglisttmp[t].length
    }
    console.log("----count now----")
    console.log(groupinglisttmp.length)
    console.log(operationseparatedCountList.length)
    console.log(operationseparatedCountList)
    operationseparatedList=groupinglisttmp

}
while(operationseparatedCountList.length>8)



console.log("----------RESULT----------")
console.log("-------counting result-------")
console.log(separatedCountList)

var groupingresult=operationseparatedList
console.log("-------grouping result-------")
for(var i=0;i<groupingresult.length;i++){
    console.log(groupingresult[i].length)
}
console.log(groupingresult.length)

// output file
var str=""
var strSingleLine=""
for (var i=0;i<groupingresult.length;i++){
    var colormark=i+1
    for (var j=0;j<groupingresult[i].length;j++){
        strSingleLine=groupingresult[i][j].toString();
        strSingleLine=strSingleLine+'\t'+colormark+'\n'
        str=str+strSingleLine
    }
}
str=str.replace(/,/g,";")
fs.writeFile("classifylist.txt",str,function(err){
    if(err){
        return console.log(err)
    }
})

//find commonness
var commonness=""
for (var g=0;g<groupingresult.length;g++){
    var commonnessWordPosi=0
    var commonnessWordCount=0
    var commonnessWordTemp=""
    console.log("--------"+"group"+(g+1))
    var commonnessStrTemp=""
    var commonnessTemp=""
    do{
        commonnessWordCount=0
        commonnessWordTemp=groupingresult[g][0][commonnessWordPosi]
        console.log(commonnessWordTemp)
        if (commonnessStrTemp==""){
            commonnessStrTemp=commonnessWordTemp
        }
        else{
            commonnessStrTemp=commonnessStrTemp+";"+commonnessWordTemp
        }

        for (var species=0;species<groupingresult[g].length;species++){
            if (groupingresult[g][species][commonnessWordPosi]==commonnessWordTemp){
                commonnessWordCount=commonnessWordCount+1
            }
        }
        console.log(commonnessWordCount)
        commonnessWordPosi=commonnessWordPosi+1
        if (commonnessWordCount!=groupingresult[g].length){
            console.log(commonnessStrTemp)
            //the last word is not common word, so remove it.
            commonnessTemp=commonnessStrTemp.substring(0,commonnessStrTemp.lastIndexOf(";"))
            console.log(commonnessTemp)
            commonness=commonness+(g+1)+'\t'+commonnessTemp+'\n'
        }
        
    }
    while (commonnessWordCount==groupingresult[g].length)
}
fs.writeFile("commonness.txt",commonness,function(err){
    if(err){
        return console.log(err)
    }
})
  • 20220928_invertebrate_grouping_script_finished.1664346769.txt.gz
  • 最終更新: 2022/09/28 06:32
  • by 133.11.50.163