sraデータをsilvaで解析

差分

このページの2つのバージョン間の差分を表示します。

この比較画面へのリンク

sraデータをsilvaで解析 [2024/02/26 04:20] 133.11.224.94sraデータをsilvaで解析 [Unknown date] (現在) – 削除 - 外部編集 (Unknown date) 127.0.0.1
行 1: 行 1:
-# パブリックデータの解析が終わったら 2024 
- 
-``` 
-#cd /home/yoshitake.kazutoshi/work3/metasearch/2023/db4-2-merge 
-for i in `ls -d ../db4/[1-9]*|grep -v "[.]tar"`;do echo $i; j=`basename $i`; mkdir $j; cp $i/*.result $j;  done 
-for i in `ls -d ../db4-2/[1-9]*|grep -v "[.]tar"`;do echo $i; j=`basename $i`; cp $i/*.result $j;  done 
- 
-mkdir merge 
-for j in [1-9]*; do echo $j; if [ `find $j|grep result$|wc -l` != 0 ];then awk -F'\t' 'FNR==1{split(FILENAME,arr,"/"); split(arr[2],arr2,"."); print "id\t"arr2[1]} {print $0}' `find $j|grep result$` > merge/$j.db.tsv; fi; done 
-#file size 0を消しておく 
-find merge/ -size 0|xargs rm 
- 
-#前の結果と統合する 
-mkdir merge2 
-find ~/work3/metasearch/2023/db3-3-merge/merge/|grep tsv$|while read i; do ln -s $i merge2/db3-`basename $i`; done 
-find ~/work3/metasearch/2023/db4-2-merge/merge/|grep tsv$|while read i; do ln -s $i merge2/db4-`basename $i`; done 
- 
-~/oldhome/work2/calccor/calccor test1 merge2/ 
-``` 
- 
-統合した結果から分割したい場合(metasearch DB用) 
- 
-``` 
-#m128m /suikou/download9-v251/metasearch/metasearch_dev/metasearch/data/db 
-for i in `ls ../db_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".input"} {print $0 > filename}' ../db_merge/$i ; done 
-``` 
- 
-テスト用の解析結果作成 
- 
-``` 
-cd /home/yoshitake.kazutoshi/work3/metasearch/2023/db3-3-merge/test 
-srr=SRR19572742 
-#bash run-download.sh $srr 
-bash ~/work3/metasearch/2023/db4-2/run-blast3-4.sh $srr 
-(echo -e "id\t$srr"; cat $srr.result) > $srr.input 
-~/oldhome/work2/calccor/calccor $srr.input ~/work3/metasearch/2023/db4-2-merge/merge2/ 
-``` 
- 
-生物種ごとにどこに多いかを表示するデータ用 
- 
-``` 
-for i in [1-9] [1-9][0-9] [1-9][0-9][0-9] [1-9][0-9][0-9][0-9]; do echo bash run-sp.sh $i; done | xargs -I{} -P16 bash -c "{}" 
-``` 
- 
-``` 
-i=$1 #1 
-echo $i 
-find $i|grep result$|while read i; do awk -F'\t' '$0!~"^unknown"{cnt+=$2; data[$1]=$2} END{for(i in data){n=split(i, arr, ";"); path=arr[1]; data2[path]+=data[i]; for(j=2;j<=n;j++){path=path";"arr[j]; data2[path]+=data[i]}}; for(i in data2){print FILENAME"\t"i"\t"data2[i]/cnt*100}}' $i; done|awk -F'\t' '{data[$2][$3][$1]=1} END{for(i in data){PROCINFO["sorted_in"]="@ind_num_desc"; for(j in data[i]){for(k in data[i][j]){print i"\t"j"\t"k}}}}' > species.$i.txt 
-``` 
- 
-``` 
-cat species.*.txt|sort --parallel=16 --buffer-size=100G -t$'\t' -k1,1 -k2,2gr > species.all 
-``` 
- 
-検索用にDB作成 
- 
-``` 
-cat ../db_merge/db*.tsv|awk '{if($1=="id"){name=$2}else{print $0"\t"name}}' > db.tsv 
-sqlite3 species.db 
-CREATE TABLE IF NOT EXISTS data (sp_name TEXT, percent REAL, srr_name TEXT); 
-.mode tab 
-.import db.tsv data 
-CREATE INDEX index_sp_name ON data(sp_name); 
-``` 
- 
-``` 
-(echo "CREATE TABLE IF NOT EXISTS data (sp_name TEXT, percent REAL, srr_name TEXT);" 
-echo ".mode tab"; 
-for i in species.*.txt; do echo ".import $i data"; done 
-echo "CREATE INDEX index_sp_name ON data(sp_name);") | sqlite3 species.db 
- 
-echo -e 'select count(distinct srr_name) from data' |sqlite3 species.db > species.srr.count 
-``` 
- 
-``` 
-echo -e '.mode tabs\nselect * from data where sp_name = "Bacteria;Firmicutes;Bacilli;Acholeplasmatales;Acholeplasmataceae" order by percent desc' |sqlite3 species.db |sort -t$'\t' -k2,2gr|wc -l 
- 
-sp_nameに「"」はないけど、「'」はある。なのでsp_nameを「"」で囲むようにSQLを発行すれば検索できる 
-echo -e '.mode tabs\nselect * from data where sp_name = "'"$a"'" order by percent desc' |sqlite3 species.db 
-``` 
- 
- 
-# SILVAで解析 
- 
-run-silva.shとして下記ファイルを作成 
- 
-``` 
-i=$1 
-work=$PWD 
-fastqdir=/suikou/files/r311/sra-fastq/SraId_aa.fastq 
-resultdir=/suikou/files/r311/sra-fastq-silva/SraId_aa 
-mkdir -p /tmp/$i/input 
-cd /tmp/$i 
-cp $fastqdir/$i input 
-/suikou/tool/yoshitake/pp/metagenome~silva_SSU+LSU -c 16 -m 32 -d 50 -t 0.99 input 
-gzip input/*.ssu.blast 
-cp input/*.ssu.blast.gz input/*.ssu.blast.filtered.name.lca.cnt2.input $resultdir 
-cd /tmp 
-rm -rf /tmp/$i 
-``` 
- 
-/suikou/files/r311/sra-fastq-silva/SraId_aa 
- 
-にて下記コマンド 
- 
-``` 
-for i in `cat /suikou/files/r311/sra-fastq/SraId_aa`; do qsub -q all.q@m50* -j Y -N $i -pe def_slot 12 ~/qsubsh4 bash /suikou/files/r311/sra-fastq-silva/run-silva.sh $i.fastq.gz; done 
-``` 
- 
-DDBJ 
- 
-``` 
-cd ~/metadb 
-for i in `cat SraId_ac`; do qsub -j Y -N $i ~/qsubsh8 bash ./run-silva.sh $i.fastq.gz; done 
-``` 
- 
-r251 
- 
-``` 
-for i in `cat SraId_ad`; do qsub -j Y -N $i -pe def_slot 12 ~/qsubsh4 bash ./run-silva.sh $i.fastq.gz; done 
-``` 
- 
-アレイジョブで 
- 
-``` 
-qsub run-job.sh SraId_ae 
-``` 
- 
-``` 
-$ more run-job.sh 
-#!/bin/bash 
-#$ -S /bin/bash 
-#$ -cwd 
-#$ -pe def_slot 8 
-#$ -l mem_req=4.00G,s_vmem=4.00G 
-#$ -t 1:10000 
- 
-source ~/.bashrc 
- 
-echo "$*" 
-eval "$*" 
- 
-n=$SGE_TASK_ID 
- 
-echo $n 
- 
-id=$1 #SraId_ab 
- 
-i=`head -n $n $id|tail -n 1` 
-bash ./run-silva-range.sh $id $i.fastq.gz 
-``` 
- 
-``` 
-$ more run-silva-range.sh 
-id=$1 #SraId_ab 
-i=$2  #ERR0000.fastq.gz 
-work=$PWD 
-fastqdir=$work/$id.fastq 
-resultdir=$work/sra-fastq-silva/$id 
-mkdir -p /tmp/$i/input 
-mkdir -p $resultdir 
-cd /tmp/$i 
-cp $fastqdir/$i input 
-~/pp/metagenome~silva_SSU+LSU -c 8 -m 32 -d 50 -t 0.99 input 
-gzip input/*.ssu.blast 
-cp input/*.ssu.blast.gz input/*.ssu.blast.filtered.name.lca.cnt2.input $resultdir 
-cd /tmp 
-rm -rf /tmp/$i 
-``` 
- 
-データ転送(shiro, ddbj) @r311:/data/sra-fastq 
-``` 
-serv=shiro 
-for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=a$i; ssh $serv mkdir metadb_$id; scp -r run-silva-range.sh run-job-$serv.sh SraId_${id}* $serv:metadb_$id; done 
-``` 
- 
-データ転送(m50v251n3) @m768:/suikou/files/m512/backup/r311/sra-fastq 
-``` 
-serv=m50v251n3 
-for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=a$i; ssh $serv mkdir /mnt/r251/metadb_$id; scp -r run-silva-range.sh run-job-$serv.sh SraId_${id}* $serv:/mnt/r251/metadb_$id; done 
-``` 
- 
-データ転送(r311) @m512:/data/backup/r311 
-``` 
-for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=b$i; mkdir metadb_$id; chmod 777 metadb_$id; cp -r sra-fastq/run-job-r311.sh sra-fastq/run-silva-range.sh sra-fastq/SraId_${id}* metadb_$id; done 
-``` 
- 
-データ転送(t7500) @m768:/suikou/files/m512/backup/r311/sra-fastq 
-``` 
-serv=t7500 
-for i in a; do id=c$i; ssh $serv mkdir /ddca2/metadb_$id; scp -r run-silva-range.sh run-job-$serv.sh SraId_${id}* $serv:/ddca2/metadb_$id; done 
-``` 
- 
-アレイジョブ投入(ddbj, shiro) 
-``` 
-serv=ddbj 
-for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=b$i; cd metadb_$id; ls ; qsub -N $id run-job-$serv.sh SraId_$id; cd ..; done 
-``` 
- 
-アレイジョブ投入(r311, m50v251n3, t7500) 
-``` 
-serv=r311 
-for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=b$i; cd metadb_$id; ls ; qsub -p -1000 -N $id run-job-$serv.sh SraId_$id; cd ..; done 
-``` 
- 
-shiroのみジョブが勝手に再スケジュールされるので、順番通りに実行されるように強制 
-``` 
-qalter  -hold_jid ao ap #aoが終わったらapを実行 
- 
-for i in m n o p q r s t u v w x y z; do id=e$i; echo qalter  -hold_jid $jd $id ; jd=$id; done 
-``` 
- 
-t7500 
-``` 
-source /ddca/sge-8.1.8/default/common/settings.sh 
-serv=t7500 
-for i in b c d e f g h i j k l m n o p q r s t u v w x y z; do id=c$i; cd /ddca2/metadb_$id; ls ; sed -i 's/-c 8 /-c 4 /' run-silva-range.sh; qsub -p -1000 -N $id run-job-$serv.sh SraId_$id; cd ..; done 
- 
-#ほかのジョブを優先的に投げたい場合は 
-sudo su - 
-source /ddca/sge-8.1.8/default/common/settings.sh 
-qalter -p 1000 <jobid> 
-``` 
- 
-## 進行状況確認 
- 
-``` 
-for i in m256y m50v251n3 shiro ddbj t7500; do ssh $i qstat|egrep " (h|)qw"|head -n 1; done 
-``` 
- 
-#解析が終わったら 
- 
-解析したサーバ上で 
- 
-``` 
-cd metadb 
-tar zvcf log.tar.gz *.o* 
-mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/ 
-mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/ 
- 
-#まとめて 
-for i in metadb_ce metadb_cf metadb_cg metadb_ch metadb_ci; do echo $i; cd $i; tar zvcf log.tar.gz *.o*; mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/; mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/; cd ..; done 
- 
-``` 
- 
-m512から 
- 
-``` 
-cd /data/backup/r311/sra-fastq-silva-result 
-rsync -av --progress shiro:metadb_ah/sra-fastq-silva/SraId_[a-z][a-z] . 
-``` 
- 
-m50v251n3の場合@m512 
- 
-``` 
-cd /data/backup/r311/sra-fastq-silva-result 
-rsync -av --progress yoshitake@133.11.144.11:/data/metadb_dj/sra-fastq-silva/SraId_[a-z][a-z] . 
-``` 
- 
-t7500にr311から 
- 
-``` 
-cd /mnt/backup/r311/sra-fastq-silva-result 
-rsync -av --progress t7500:/ddca2/metadb_c[mn]/sra-fastq-silva/SraId_[a-z][a-z] . 
-``` 
- 
-r311での結果の場合 
- 
-``` 
-id=bb 
-cd /data/backup/r311/metadb_$id 
-tar zvcf log.tar.gz *.o* 
-mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/ 
-mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/ 
-sudo mv -i sra-fastq-silva/SraId_[a-z][a-z] ../sra-fastq-silva-result/ 
-cd .. 
-rm -rf metadb_$id 
- 
-#まとめて 
-for id in ah ai dk dl dm do dp ; do echo $id; cd /data/backup/r311/metadb_$id; tar zvcf log.tar.gz *.o*; mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/; mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/; sudo mv -i sra-fastq-silva/SraId_[a-z][a-z] ../sra-fastq-silva-result/; cd ..; rm -rf metadb_$id; done 
- 
-``` 
- 
-本番サーバへ転送 
- 
-/data/backup/r311/sra-fastq-silva-result@m512 
-``` 
-for i in SraId_*; do echo $i; mkdir db/$i; cd db/$i; find ../../$i|grep .input$|xargs -I{} bash -c "ln -s {} ."; cd ../..; done 
-find db|grep  .input$|xargs zip input.zip 
-``` 
- 
-``` 
-for i in SraId_*; do if [ ! -e db/$i ]; then echo $i; mkdir db/$i; cd db/$i; find ../../$i|grep .input$|xargs -I{} bash -c "ln -s {} ."; cd ../..; fi; done 
-find db|grep  .input$|xargs zip input.zip 
-``` 
- 
-まとめたDBファイルの作成 
- 
-``` 
-for i in db/SraId_* ; do echo $i; cat `find $i` > db_merge/`basename $i`.tsv; done 
-``` 
- 
-genusでのカウントファイル作成 
- 
-``` 
-@/home/yoshitake/yoshitake/db_merge_genus 
- 
-for i in `ls ../db_merge`; do echo $i; awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!="") 
-{print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.genus ~/yoshitake/db_merge/$i > ~/yoshitake/db_merge_genus/$i; done 
- 
-#グリッドエンジンで処理するには 
-for i in `ls ~/yoshitake/db_merge`; do echo $i; qsub -pe def_slot 1 ~/qsubsh4 bash ~/yoshitake/run-db_merge_genus $i; done 
-``` 
- 
-speciesでのカウントファイル作成 
- 
-``` 
-@/home/yoshitake/yoshitake/db_merge_species 
- 
-i=SraId_aa.tsv 
-awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!="") 
-{print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.species ~/yoshitake/db_merge/$i > ~/yoshitake/db_merge_species/$i 
- 
-#グリッドエンジンで処理するには 
-for i in `ls ~/yoshitake/db_merge`; do echo $i; qsub -pe def_slot 1 ~/qsubsh4 bash ~/yoshitake/run-db_merge_species $i; done 
-``` 
- 
-マージしたDBファイルをバラバラにする 
- 
-``` 
-#all 
-for i in `ls ../db_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge/$i ; done 
- 
-#genus 
-for i in `ls ../db_merge_genus/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge_genus/$i ; done 
- 
-#species 
-for i in `ls ../db_merge_species/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge_species/$i ; done 
- 
-``` 
- 
-#blastのフィルターパラメータ変更 
- 
-``` 
-@~/work2/metagenome-db/chnage-blast-filter 
-#for i in /suikou/files/m512/backup/r311/sra-fastq/SraId_aa; do i0=`basename $i`; mkdir -p $i0; for j0 in `cat $i`; do echo "cd $i0; bash ../run-change-blast-filter.sh $i0 $j0"; done; done|xargs -I{} -P6 bash -c "{}" 
- 
-#m512に結果ファイルがある状態での実行 
-for i in /suikou/files/m512/backup/r311/sra-fastq-silva-result/SraId_*; do i0=`basename $i`; mkdir -p $i0; for j0 in `cat /suikou/files/m512/backup/r311/sra-fastq/$i0`; do echo "cd $i0; bash ../run-change-blast-filter.sh2 $i0 $j0"; done; done|xargs -I{} -P6 bash -c "{}" 
-``` 
- 
-``` 
-#i0=SraId_dn 
-#j0=SRR7870496 
-i0=$1 
-j0=$2 
-j=/suikou/files/r311/sra-fastq-silva-result/$i0/$j0.fastq.fasta 
-bitscore=100 
-top=0.95 
-ref=/suikou/db/silva/2018-10-10/SILVA_132_SSU-LSU_Ref.fasta 
- 
-mkdir -p /tmp/$j0 
-workdir="$PWD" 
-cd /tmp/$j0 
-zcat $j.ssu.blast.gz|awk -F'\t' '$12>'$bitscore'{if(a[$1]==1){if($12>=topbit*'$top'){print $0}}else{a[$1]=1; topbit=$12; print $0}}'|awk 'FILENAME==ARGV[1]{if(FNR%4==1){name=substr($1,2)}else if(FNR%4==2){len[name]=length($0); l+=length($0); n++}} FILENAME==ARGV[2]{if(len[$1]==0){len[$1]=l/n}; if($3>90&&$4>len[$1]*0.8){print $0}}' <(zcat /suikou/files/m512/backup/r311/sra-fastq/$i0.fastq/$j0.fastq.gz) /dev/stdin > `basename $j`.ssu.blast.filtered 
- 
-echo "##determine LCA" 
-awk -F'\t' 'FILENAME==ARGV[1]{name[$1]=$2} FILENAME==ARGV[2]{print name[$2]"\t"$0}' $ref.name `basename $j`.ssu.blast.filtered > `basename $j`.ssu.blast.filtered.name 
- 
-awk -F'\t' ' 
-function searchLCA(data,  i, j, res, res2, str, n, stopflag){ 
- for(i in data){ 
-  if(n==0){n=split(i,res,";")} 
-  else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} 
- } 
- if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; 
- for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} 
- return str; 
-} 
-{ 
- if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} 
- else{data[$1]=1} 
-} 
-END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} 
-' `basename $j`.ssu.blast.filtered.name > `basename $j`.ssu.blast.filtered.name.lca 
- 
-awk -F'\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' `basename $j`.ssu.blast.filtered.name.lca > `basename $j`.ssu.blast.filtered.name.lca.cnt 
-awk -F'\t' '{print "root;"$0}' `basename $j`.ssu.blast.filtered.name.lca.cnt > `basename $j`.ssu.blast.filtered.name.lca.cnt2 
- 
-echo -e "id\t`basename $j.ssu.blast.filtered.name.lca.cnt2|sed 's/.fasta.ssu.blast.filtered.name.lca.cnt2$//'`" > `basename $j`.ssu.blast.filtered.name.lca.cnt2.input 
-cat `basename $j`.ssu.blast.filtered.name.lca.cnt2 >> `basename $j`.ssu.blast.filtered.name.lca.cnt2.input 
- 
-rm $j0.fastq.fasta.ssu.blast.filtered $j0.fastq.fasta.ssu.blast.filtered.name $j0.fastq.fasta.ssu.blast.filtered.name.lca $j0.fastq.fasta.ssu.blast.filtered.name.lca.cnt $j0.fastq.fasta.ssu.blast.filtered.name.lca.cnt2 
-mv `basename $j`.ssu.blast.filtered.name.lca.cnt2.input $workdir 
-cd $workdir 
-rmdir /tmp/$j0 
- 
-``` 
- 
-DBファイルの作成 @m32s:~/work2/metagenome-db/chnage-blast-filter 
- 
-``` 
-mkdir db_merge 
-for i in SraId_* ; do echo $i; cat `find $i|grep input$` > db_merge/`basename $i`.tsv; done 
-mkdir db_merge_genus 
-#for i in `ls db_merge`; do echo $i; qsub -j y -pe def_slot 1 ~/qsubsh4 bash ./run-db_merge_genus $i; done 
-for i in `ls db_merge`; do echo "echo $i; bash ./run-db_merge_genus $i"; done|xargs -I{} -P6 bash -c "{}" 
-mkdir db_merge_species 
-#for i in `ls db_merge`; do echo $i; qsub -j y -pe def_slot 1 ~/qsubsh4 bash ./run-db_merge_species $i; done 
-for i in `ls db_merge`; do echo "echo $i; bash ./run-db_merge_species $i"; done|xargs -I{} -P6 bash -c "{}" 
-``` 
- 
-``` 
-#cat run-db_merge_genus 
-i=$1 
-awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/work2/metagenome-db/chnage-blast-filter/SILVA_132_SSU-LSU_Ref.fasta.name.genus ~/work2/metagenome-db/chnage-blast-filter/db_merge/$i > ~/work2/metagenome-db/chnage-blast-filter/db_merge_genus/$i 
-``` 
- 
-``` 
-#cat run-db_merge_species 
-i=$1 
-awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/work2/metagenome-db/chnage-blast-filter/SILVA_132_SSU-LSU_Ref.fasta.name.species ~/work2/metagenome-db/chnage-blast-filter/db_merge/$i > ~/work2/metagenome-db/chnage-blast-filter/db_merge_species/$i 
-``` 
- 
-本番サーバへ転送して分割して配置 
- 
-``` 
-#in ~/work2/metagenome-db/chnage-blast-filter @m32s 
-rsync -av --progress db_merge_genus/ m50v251n3:/usr/local/yoshitake/db_genus_merge/ 
-rsync -av --progress db_merge_species/ m50v251n3:/usr/local/yoshitake/db_species_merge/ 
-rsync -av --progress db_merge/ m50v251n3:/usr/local/yoshitake/db_merge/ 
- 
-ssh m50v251n3 
-cd /usr/local/yoshitake 
- 
-#all 
-cd /usr/local/yoshitake/db 
-for i in `ls ../db_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge/$i ; done 
- 
-#genus 
-cd /usr/local/yoshitake/db_genus 
-for i in `ls ../db_genus_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_genus_merge/$i ; done 
- 
-#species 
-cd /usr/local/yoshitake/db_species 
-for i in `ls ../db_species_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_species_merge/$i ; done 
- 
-for i in 1 2 4 5 6 7 8 9; do rsync -av --progress /usr/local/yoshitake m50v251n$i:/usr/local/; done 
-``` 
- 
- 
-##HTMLファイルをコマンドから作成 
- 
-``` 
-#@yoshitake/tt 
-cp -r ~/metasearch/tmp . 
-rm tmp/*/*.gz tmp/*/*.fq 
-#for i in tmp/*/*.tsv; do bash run-rerun.sh $i; done 
-for i in tmp/*/*.tsv; do echo bash run-rerun.sh $i; done|xargs -I {} -P 15 bash -c "{}" 
- 
-for i in tmp/*/*/result.html; do echo "<a href='$i'>$i</a>"; done > list.html 
- 
-``` 
- 
-``` 
-#i=tmp/641b199531d9634a9710474c83f1cd63/641b199531d9634a9710474c83f1cd63.fq.tsv 
-i=$1 
- 
- 
-name="`cat $(dirname $i)/$(dirname $i|sed 's/tmp.//')/result.html |grep h3|head -n 1|sed 's/<\/.*//; s/.*>//'`" 
-#species 
-mkdir -p $(dirname $i)/$(dirname $i|sed 's/tmp.//') 
-awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.species $i > $i.species.tsv 
-./calccor $i.species.tsv ~/yoshitake/db_merge_species 
-singularity exec ~/metasearch/script/python3_env_mako_installed.sif python create_page_species.py "`echo $i.species.tsv|sed 's/.tsv$//'`" "$name" 
-singularity run ~/metasearch/script/krona_v2.7.1_cv1.sif ktImportText $(dirname $i)/result.kraken -o $(dirname $i)/$(dirname $i|sed 's/tmp.//')/krona_out.html 
-mv $(dirname $i)/$(dirname $i|sed 's/tmp.//') $(dirname $i)/$(dirname $i|sed 's/tmp.//')s 
- 
-#genus 
-mkdir -p $(dirname $i)/$(dirname $i|sed 's/tmp.//') 
-awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.genus $i > $i.genus.tsv 
-./calccor $i.genus.tsv ~/yoshitake/db_merge_genus 
-singularity exec ~/metasearch/script/python3_env_mako_installed.sif python create_page_genus.py "`echo $i.genus.tsv|sed 's/.tsv$//'`" "$name" 
-singularity run ~/metasearch/script/krona_v2.7.1_cv1.sif ktImportText $(dirname $i)/result.kraken -o $(dirname $i)/$(dirname $i|sed 's/tmp.//')/krona_out.html 
-mv $(dirname $i)/$(dirname $i|sed 's/tmp.//') $(dirname $i)/$(dirname $i|sed 's/tmp.//')g 
- 
-#totalやり直し 
-mkdir -p $(dirname $i)/$(dirname $i|sed 's/tmp.//') 
-./calccor $i ~/yoshitake/db_merge 
-singularity exec ~/metasearch/script/python3_env_mako_installed.sif python create_page.py "`echo $i|sed 's/.tsv$//'`" "$name" 
-singularity run ~/metasearch/script/krona_v2.7.1_cv1.sif ktImportText $(dirname $i)/result.kraken -o $(dirname $i)/$(dirname $i|sed 's/tmp.//')/krona_out.html 
- 
-``` 
  
  • sraデータをsilvaで解析.1708921215.txt.gz
  • 最終更新: 2024/02/26 04:20
  • by 133.11.224.94