| |
sraデータをsilvaで解析 [2024/02/26 05:13] – 133.11.224.94 | sraデータをsilvaで解析 [Unknown date] (現在) – 削除 - 外部編集 (Unknown date) 127.0.0.1 |
---|
# パブリックデータの解析が終わったら 2024 | |
| |
``` | |
#cd /home/yoshitake.kazutoshi/work3/metasearch/2023/db4-2-merge | |
for i in `ls -d ../db4/[1-9]*|grep -v "[.]tar"`;do echo $i; j=`basename $i`; mkdir $j; cp $i/*.result $j; done | |
for i in `ls -d ../db4-2/[1-9]*|grep -v "[.]tar"`;do echo $i; j=`basename $i`; cp $i/*.result $j; done | |
| |
mkdir merge | |
for j in [1-9]*; do echo $j; if [ `find $j|grep result$|wc -l` != 0 ];then awk -F'\t' 'FNR==1{split(FILENAME,arr,"/"); split(arr[2],arr2,"."); print "id\t"arr2[1]} {print $0}' `find $j|grep result$` > merge/$j.db.tsv; fi; done | |
#file size 0を消しておく | |
find merge/ -size 0|xargs rm | |
| |
#前の結果と統合する | |
mkdir merge2 | |
find ~/work3/metasearch/2023/db3-3-merge/merge/|grep tsv$|while read i; do ln -s $i merge2/db3-`basename $i`; done | |
find ~/work3/metasearch/2023/db4-2-merge/merge/|grep tsv$|while read i; do ln -s $i merge2/db4-`basename $i`; done | |
| |
~/oldhome/work2/calccor/calccor test1 merge2/ | |
``` | |
| |
統合した結果から分割したい場合(metasearch DB用) | |
| |
``` | |
#m128m /suikou/download9-v251/metasearch/metasearch_dev/metasearch/data/db | |
for i in `ls ../db_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".input"} {print $0 > filename}' ../db_merge/$i ; done | |
``` | |
| |
テスト用の解析結果作成 | |
| |
``` | |
cd /home/yoshitake.kazutoshi/work3/metasearch/2023/db3-3-merge/test | |
srr=SRR19572742 | |
#bash run-download.sh $srr | |
bash ~/work3/metasearch/2023/db4-2/run-blast3-4.sh $srr | |
(echo -e "id\t$srr"; cat $srr.result) > $srr.input | |
~/oldhome/work2/calccor/calccor $srr.input ~/work3/metasearch/2023/db4-2-merge/merge2/ | |
``` | |
| |
生物種ごとにどこに多いかを表示するデータ用 | |
| |
``` | |
for i in [1-9] [1-9][0-9] [1-9][0-9][0-9] [1-9][0-9][0-9][0-9]; do echo bash run-sp.sh $i; done | xargs -I{} -P16 bash -c "{}" | |
``` | |
| |
``` | |
i=$1 #1 | |
echo $i | |
find $i|grep result$|while read i; do awk -F'\t' '$0!~"^unknown"{cnt+=$2; data[$1]=$2} END{for(i in data){n=split(i, arr, ";"); path=arr[1]; data2[path]+=data[i]; for(j=2;j<=n;j++){path=path";"arr[j]; data2[path]+=data[i]}}; for(i in data2){print FILENAME"\t"i"\t"data2[i]/cnt*100}}' $i; done|awk -F'\t' '{data[$2][$3][$1]=1} END{for(i in data){PROCINFO["sorted_in"]="@ind_num_desc"; for(j in data[i]){for(k in data[i][j]){print i"\t"j"\t"k}}}}' > species.$i.txt | |
``` | |
| |
``` | |
cat species.*.txt|sort --parallel=16 --buffer-size=100G -t$'\t' -k1,1 -k2,2gr > species.all | |
``` | |
| |
検索用にDB作成 | |
| |
``` | |
cat db_merge/db*.tsv|awk -F'\t' '{if($1=="id"){if(name!=""){delete res; for(i in data){c=data[i]/cnt*100; split(i,arr,";"); str=arr[1]; res[str]+=c; for(j=2;j<=length(arr);j++){str=str";"arr[j]; res[str]+=c}}; for(i in res){print i"\t"res[i]"\t"name}}; delete data; name=$2; cnt=0}else{data[$1]=$2; cnt+=$2}} END{delete res; for(i in data){c=data[i]/cnt*100; split(i,arr,";"); str=arr[1]; res[str]+=c; for(j=2;j<=length(arr);j++){str=str";"arr[j]; res[str]+=c}}; for(i in res){print i"\t"res[i]"\t"name}}' > db.tsv | |
sqlite3 species.db | |
CREATE TABLE IF NOT EXISTS data (sp_name TEXT, percent REAL, srr_name TEXT); | |
.mode tab | |
.import db.tsv data | |
CREATE INDEX index_sp_name ON data(sp_name); | |
.quit | |
a="Eukaryota;Obazoa;Opisthokonta;Fungi;Basidiomycota;Agaricomycotina;Agaricomycetes;Lentinus;Lentinus_crinitus" | |
echo -e '.mode tabs\nselect * from data where sp_name = "'"$a"'" order by percent desc' |sqlite3 species.db | |
``` | |
| |
``` | |
(echo "CREATE TABLE IF NOT EXISTS data (sp_name TEXT, percent REAL, srr_name TEXT);" | |
echo ".mode tab"; | |
for i in species.*.txt; do echo ".import $i data"; done | |
echo "CREATE INDEX index_sp_name ON data(sp_name);") | sqlite3 species.db | |
| |
echo -e 'select count(distinct srr_name) from data' |sqlite3 species.db > species.srr.count | |
``` | |
| |
``` | |
echo -e '.mode tabs\nselect * from data where sp_name = "Bacteria;Firmicutes;Bacilli;Acholeplasmatales;Acholeplasmataceae" order by percent desc' |sqlite3 species.db |sort -t$'\t' -k2,2gr|wc -l | |
| |
sp_nameに「"」はないけど、「'」はある。なのでsp_nameを「"」で囲むようにSQLを発行すれば検索できる | |
echo -e '.mode tabs\nselect * from data where sp_name = "'"$a"'" order by percent desc' |sqlite3 species.db | |
``` | |
| |
SRRファイル検索用一覧表作成 | |
| |
``` | |
find db|grep input$ > input.list | |
echo -e '.mode tabs\nselect * from data where sp_name = "'"$a"'" order by percent desc' |sqlite3 species.db | |
``` | |
| |
| |
# SILVAで解析 | |
| |
run-silva.shとして下記ファイルを作成 | |
| |
``` | |
i=$1 | |
work=$PWD | |
fastqdir=/suikou/files/r311/sra-fastq/SraId_aa.fastq | |
resultdir=/suikou/files/r311/sra-fastq-silva/SraId_aa | |
mkdir -p /tmp/$i/input | |
cd /tmp/$i | |
cp $fastqdir/$i input | |
/suikou/tool/yoshitake/pp/metagenome~silva_SSU+LSU -c 16 -m 32 -d 50 -t 0.99 input | |
gzip input/*.ssu.blast | |
cp input/*.ssu.blast.gz input/*.ssu.blast.filtered.name.lca.cnt2.input $resultdir | |
cd /tmp | |
rm -rf /tmp/$i | |
``` | |
| |
/suikou/files/r311/sra-fastq-silva/SraId_aa | |
| |
にて下記コマンド | |
| |
``` | |
for i in `cat /suikou/files/r311/sra-fastq/SraId_aa`; do qsub -q all.q@m50* -j Y -N $i -pe def_slot 12 ~/qsubsh4 bash /suikou/files/r311/sra-fastq-silva/run-silva.sh $i.fastq.gz; done | |
``` | |
| |
DDBJ | |
| |
``` | |
cd ~/metadb | |
for i in `cat SraId_ac`; do qsub -j Y -N $i ~/qsubsh8 bash ./run-silva.sh $i.fastq.gz; done | |
``` | |
| |
r251 | |
| |
``` | |
for i in `cat SraId_ad`; do qsub -j Y -N $i -pe def_slot 12 ~/qsubsh4 bash ./run-silva.sh $i.fastq.gz; done | |
``` | |
| |
アレイジョブで | |
| |
``` | |
qsub run-job.sh SraId_ae | |
``` | |
| |
``` | |
$ more run-job.sh | |
#!/bin/bash | |
#$ -S /bin/bash | |
#$ -cwd | |
#$ -pe def_slot 8 | |
#$ -l mem_req=4.00G,s_vmem=4.00G | |
#$ -t 1:10000 | |
| |
source ~/.bashrc | |
| |
echo "$*" | |
eval "$*" | |
| |
n=$SGE_TASK_ID | |
| |
echo $n | |
| |
id=$1 #SraId_ab | |
| |
i=`head -n $n $id|tail -n 1` | |
bash ./run-silva-range.sh $id $i.fastq.gz | |
``` | |
| |
``` | |
$ more run-silva-range.sh | |
id=$1 #SraId_ab | |
i=$2 #ERR0000.fastq.gz | |
work=$PWD | |
fastqdir=$work/$id.fastq | |
resultdir=$work/sra-fastq-silva/$id | |
mkdir -p /tmp/$i/input | |
mkdir -p $resultdir | |
cd /tmp/$i | |
cp $fastqdir/$i input | |
~/pp/metagenome~silva_SSU+LSU -c 8 -m 32 -d 50 -t 0.99 input | |
gzip input/*.ssu.blast | |
cp input/*.ssu.blast.gz input/*.ssu.blast.filtered.name.lca.cnt2.input $resultdir | |
cd /tmp | |
rm -rf /tmp/$i | |
``` | |
| |
データ転送(shiro, ddbj) @r311:/data/sra-fastq | |
``` | |
serv=shiro | |
for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=a$i; ssh $serv mkdir metadb_$id; scp -r run-silva-range.sh run-job-$serv.sh SraId_${id}* $serv:metadb_$id; done | |
``` | |
| |
データ転送(m50v251n3) @m768:/suikou/files/m512/backup/r311/sra-fastq | |
``` | |
serv=m50v251n3 | |
for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=a$i; ssh $serv mkdir /mnt/r251/metadb_$id; scp -r run-silva-range.sh run-job-$serv.sh SraId_${id}* $serv:/mnt/r251/metadb_$id; done | |
``` | |
| |
データ転送(r311) @m512:/data/backup/r311 | |
``` | |
for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=b$i; mkdir metadb_$id; chmod 777 metadb_$id; cp -r sra-fastq/run-job-r311.sh sra-fastq/run-silva-range.sh sra-fastq/SraId_${id}* metadb_$id; done | |
``` | |
| |
データ転送(t7500) @m768:/suikou/files/m512/backup/r311/sra-fastq | |
``` | |
serv=t7500 | |
for i in a; do id=c$i; ssh $serv mkdir /ddca2/metadb_$id; scp -r run-silva-range.sh run-job-$serv.sh SraId_${id}* $serv:/ddca2/metadb_$id; done | |
``` | |
| |
アレイジョブ投入(ddbj, shiro) | |
``` | |
serv=ddbj | |
for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=b$i; cd metadb_$id; ls ; qsub -N $id run-job-$serv.sh SraId_$id; cd ..; done | |
``` | |
| |
アレイジョブ投入(r311, m50v251n3, t7500) | |
``` | |
serv=r311 | |
for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do id=b$i; cd metadb_$id; ls ; qsub -p -1000 -N $id run-job-$serv.sh SraId_$id; cd ..; done | |
``` | |
| |
shiroのみジョブが勝手に再スケジュールされるので、順番通りに実行されるように強制 | |
``` | |
qalter -hold_jid ao ap #aoが終わったらapを実行 | |
| |
for i in m n o p q r s t u v w x y z; do id=e$i; echo qalter -hold_jid $jd $id ; jd=$id; done | |
``` | |
| |
t7500 | |
``` | |
source /ddca/sge-8.1.8/default/common/settings.sh | |
serv=t7500 | |
for i in b c d e f g h i j k l m n o p q r s t u v w x y z; do id=c$i; cd /ddca2/metadb_$id; ls ; sed -i 's/-c 8 /-c 4 /' run-silva-range.sh; qsub -p -1000 -N $id run-job-$serv.sh SraId_$id; cd ..; done | |
| |
#ほかのジョブを優先的に投げたい場合は | |
sudo su - | |
source /ddca/sge-8.1.8/default/common/settings.sh | |
qalter -p 1000 <jobid> | |
``` | |
| |
## 進行状況確認 | |
| |
``` | |
for i in m256y m50v251n3 shiro ddbj t7500; do ssh $i qstat|egrep " (h|)qw"|head -n 1; done | |
``` | |
| |
#解析が終わったら | |
| |
解析したサーバ上で | |
| |
``` | |
cd metadb | |
tar zvcf log.tar.gz *.o* | |
mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/ | |
mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/ | |
| |
#まとめて | |
for i in metadb_ce metadb_cf metadb_cg metadb_ch metadb_ci; do echo $i; cd $i; tar zvcf log.tar.gz *.o*; mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/; mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/; cd ..; done | |
| |
``` | |
| |
m512から | |
| |
``` | |
cd /data/backup/r311/sra-fastq-silva-result | |
rsync -av --progress shiro:metadb_ah/sra-fastq-silva/SraId_[a-z][a-z] . | |
``` | |
| |
m50v251n3の場合@m512 | |
| |
``` | |
cd /data/backup/r311/sra-fastq-silva-result | |
rsync -av --progress yoshitake@133.11.144.11:/data/metadb_dj/sra-fastq-silva/SraId_[a-z][a-z] . | |
``` | |
| |
t7500にr311から | |
| |
``` | |
cd /mnt/backup/r311/sra-fastq-silva-result | |
rsync -av --progress t7500:/ddca2/metadb_c[mn]/sra-fastq-silva/SraId_[a-z][a-z] . | |
``` | |
| |
r311での結果の場合 | |
| |
``` | |
id=bb | |
cd /data/backup/r311/metadb_$id | |
tar zvcf log.tar.gz *.o* | |
mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/ | |
mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/ | |
sudo mv -i sra-fastq-silva/SraId_[a-z][a-z] ../sra-fastq-silva-result/ | |
cd .. | |
rm -rf metadb_$id | |
| |
#まとめて | |
for id in ah ai dk dl dm do dp ; do echo $id; cd /data/backup/r311/metadb_$id; tar zvcf log.tar.gz *.o*; mv log.tar.gz sra-fastq-silva/SraId_[a-z][a-z]/; mv run*.sh sra-fastq-silva/SraId_[a-z][a-z]/; sudo mv -i sra-fastq-silva/SraId_[a-z][a-z] ../sra-fastq-silva-result/; cd ..; rm -rf metadb_$id; done | |
| |
``` | |
| |
本番サーバへ転送 | |
| |
/data/backup/r311/sra-fastq-silva-result@m512 | |
``` | |
for i in SraId_*; do echo $i; mkdir db/$i; cd db/$i; find ../../$i|grep .input$|xargs -I{} bash -c "ln -s {} ."; cd ../..; done | |
find db|grep .input$|xargs zip input.zip | |
``` | |
| |
``` | |
for i in SraId_*; do if [ ! -e db/$i ]; then echo $i; mkdir db/$i; cd db/$i; find ../../$i|grep .input$|xargs -I{} bash -c "ln -s {} ."; cd ../..; fi; done | |
find db|grep .input$|xargs zip input.zip | |
``` | |
| |
まとめたDBファイルの作成 | |
| |
``` | |
for i in db/SraId_* ; do echo $i; cat `find $i` > db_merge/`basename $i`.tsv; done | |
``` | |
| |
genusでのカウントファイル作成 | |
| |
``` | |
@/home/yoshitake/yoshitake/db_merge_genus | |
| |
for i in `ls ../db_merge`; do echo $i; awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!="") | |
{print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.genus ~/yoshitake/db_merge/$i > ~/yoshitake/db_merge_genus/$i; done | |
| |
#グリッドエンジンで処理するには | |
for i in `ls ~/yoshitake/db_merge`; do echo $i; qsub -pe def_slot 1 ~/qsubsh4 bash ~/yoshitake/run-db_merge_genus $i; done | |
``` | |
| |
speciesでのカウントファイル作成 | |
| |
``` | |
@/home/yoshitake/yoshitake/db_merge_species | |
| |
i=SraId_aa.tsv | |
awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!="") | |
{print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.species ~/yoshitake/db_merge/$i > ~/yoshitake/db_merge_species/$i | |
| |
#グリッドエンジンで処理するには | |
for i in `ls ~/yoshitake/db_merge`; do echo $i; qsub -pe def_slot 1 ~/qsubsh4 bash ~/yoshitake/run-db_merge_species $i; done | |
``` | |
| |
マージしたDBファイルをバラバラにする | |
| |
``` | |
#all | |
for i in `ls ../db_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge/$i ; done | |
| |
#genus | |
for i in `ls ../db_merge_genus/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge_genus/$i ; done | |
| |
#species | |
for i in `ls ../db_merge_species/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge_species/$i ; done | |
| |
``` | |
| |
#blastのフィルターパラメータ変更 | |
| |
``` | |
@~/work2/metagenome-db/chnage-blast-filter | |
#for i in /suikou/files/m512/backup/r311/sra-fastq/SraId_aa; do i0=`basename $i`; mkdir -p $i0; for j0 in `cat $i`; do echo "cd $i0; bash ../run-change-blast-filter.sh $i0 $j0"; done; done|xargs -I{} -P6 bash -c "{}" | |
| |
#m512に結果ファイルがある状態での実行 | |
for i in /suikou/files/m512/backup/r311/sra-fastq-silva-result/SraId_*; do i0=`basename $i`; mkdir -p $i0; for j0 in `cat /suikou/files/m512/backup/r311/sra-fastq/$i0`; do echo "cd $i0; bash ../run-change-blast-filter.sh2 $i0 $j0"; done; done|xargs -I{} -P6 bash -c "{}" | |
``` | |
| |
``` | |
#i0=SraId_dn | |
#j0=SRR7870496 | |
i0=$1 | |
j0=$2 | |
j=/suikou/files/r311/sra-fastq-silva-result/$i0/$j0.fastq.fasta | |
bitscore=100 | |
top=0.95 | |
ref=/suikou/db/silva/2018-10-10/SILVA_132_SSU-LSU_Ref.fasta | |
| |
mkdir -p /tmp/$j0 | |
workdir="$PWD" | |
cd /tmp/$j0 | |
zcat $j.ssu.blast.gz|awk -F'\t' '$12>'$bitscore'{if(a[$1]==1){if($12>=topbit*'$top'){print $0}}else{a[$1]=1; topbit=$12; print $0}}'|awk 'FILENAME==ARGV[1]{if(FNR%4==1){name=substr($1,2)}else if(FNR%4==2){len[name]=length($0); l+=length($0); n++}} FILENAME==ARGV[2]{if(len[$1]==0){len[$1]=l/n}; if($3>90&&$4>len[$1]*0.8){print $0}}' <(zcat /suikou/files/m512/backup/r311/sra-fastq/$i0.fastq/$j0.fastq.gz) /dev/stdin > `basename $j`.ssu.blast.filtered | |
| |
echo "##determine LCA" | |
awk -F'\t' 'FILENAME==ARGV[1]{name[$1]=$2} FILENAME==ARGV[2]{print name[$2]"\t"$0}' $ref.name `basename $j`.ssu.blast.filtered > `basename $j`.ssu.blast.filtered.name | |
| |
awk -F'\t' ' | |
function searchLCA(data, i, j, res, res2, str, n, stopflag){ | |
for(i in data){ | |
if(n==0){n=split(i,res,";")} | |
else{split(i,res2,";"); for(j in res){if(res[j]!=res2[j]){res[j]=""}}} | |
} | |
if(res[1]!=""){str=res[1]}else{str="unknown"; stopflag=1}; | |
for(i=2;i<=n;i++){if(stopflag==0 && res[i]!=""){str=str";"res[i]}else{stopflag=1}} | |
return str; | |
} | |
{ | |
if($2!=old){if(old!=""){print searchLCA(data)"\t"oldstr}; delete data; data[$1]=1; old=$2; oldstr=$0} | |
else{data[$1]=1} | |
} | |
END{if(length(data)>0){print searchLCA(data)"\t"oldstr}} | |
' `basename $j`.ssu.blast.filtered.name > `basename $j`.ssu.blast.filtered.name.lca | |
| |
awk -F'\t' '{cnt[$1]++} END{PROCINFO["sorted_in"]="@val_num_desc"; for(i in cnt){print i"\t"cnt[i]}}' `basename $j`.ssu.blast.filtered.name.lca > `basename $j`.ssu.blast.filtered.name.lca.cnt | |
awk -F'\t' '{print "root;"$0}' `basename $j`.ssu.blast.filtered.name.lca.cnt > `basename $j`.ssu.blast.filtered.name.lca.cnt2 | |
| |
echo -e "id\t`basename $j.ssu.blast.filtered.name.lca.cnt2|sed 's/.fasta.ssu.blast.filtered.name.lca.cnt2$//'`" > `basename $j`.ssu.blast.filtered.name.lca.cnt2.input | |
cat `basename $j`.ssu.blast.filtered.name.lca.cnt2 >> `basename $j`.ssu.blast.filtered.name.lca.cnt2.input | |
| |
rm $j0.fastq.fasta.ssu.blast.filtered $j0.fastq.fasta.ssu.blast.filtered.name $j0.fastq.fasta.ssu.blast.filtered.name.lca $j0.fastq.fasta.ssu.blast.filtered.name.lca.cnt $j0.fastq.fasta.ssu.blast.filtered.name.lca.cnt2 | |
mv `basename $j`.ssu.blast.filtered.name.lca.cnt2.input $workdir | |
cd $workdir | |
rmdir /tmp/$j0 | |
| |
``` | |
| |
DBファイルの作成 @m32s:~/work2/metagenome-db/chnage-blast-filter | |
| |
``` | |
mkdir db_merge | |
for i in SraId_* ; do echo $i; cat `find $i|grep input$` > db_merge/`basename $i`.tsv; done | |
mkdir db_merge_genus | |
#for i in `ls db_merge`; do echo $i; qsub -j y -pe def_slot 1 ~/qsubsh4 bash ./run-db_merge_genus $i; done | |
for i in `ls db_merge`; do echo "echo $i; bash ./run-db_merge_genus $i"; done|xargs -I{} -P6 bash -c "{}" | |
mkdir db_merge_species | |
#for i in `ls db_merge`; do echo $i; qsub -j y -pe def_slot 1 ~/qsubsh4 bash ./run-db_merge_species $i; done | |
for i in `ls db_merge`; do echo "echo $i; bash ./run-db_merge_species $i"; done|xargs -I{} -P6 bash -c "{}" | |
``` | |
| |
``` | |
#cat run-db_merge_genus | |
i=$1 | |
awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/work2/metagenome-db/chnage-blast-filter/SILVA_132_SSU-LSU_Ref.fasta.name.genus ~/work2/metagenome-db/chnage-blast-filter/db_merge/$i > ~/work2/metagenome-db/chnage-blast-filter/db_merge_genus/$i | |
``` | |
| |
``` | |
#cat run-db_merge_species | |
i=$1 | |
awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/work2/metagenome-db/chnage-blast-filter/SILVA_132_SSU-LSU_Ref.fasta.name.species ~/work2/metagenome-db/chnage-blast-filter/db_merge/$i > ~/work2/metagenome-db/chnage-blast-filter/db_merge_species/$i | |
``` | |
| |
本番サーバへ転送して分割して配置 | |
| |
``` | |
#in ~/work2/metagenome-db/chnage-blast-filter @m32s | |
rsync -av --progress db_merge_genus/ m50v251n3:/usr/local/yoshitake/db_genus_merge/ | |
rsync -av --progress db_merge_species/ m50v251n3:/usr/local/yoshitake/db_species_merge/ | |
rsync -av --progress db_merge/ m50v251n3:/usr/local/yoshitake/db_merge/ | |
| |
ssh m50v251n3 | |
cd /usr/local/yoshitake | |
| |
#all | |
cd /usr/local/yoshitake/db | |
for i in `ls ../db_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_merge/$i ; done | |
| |
#genus | |
cd /usr/local/yoshitake/db_genus | |
for i in `ls ../db_genus_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_genus_merge/$i ; done | |
| |
#species | |
cd /usr/local/yoshitake/db_species | |
for i in `ls ../db_species_merge/`; do echo $i; j=`basename $i .tsv`; mkdir -p $j; awk -F'\t' -v dir=$j '$1=="id"{filename=dir"/"$2".fasta.ssu.blast.filtered.name.lca.cnt2.input"} {print $0 > filename}' ../db_species_merge/$i ; done | |
| |
for i in 1 2 4 5 6 7 8 9; do rsync -av --progress /usr/local/yoshitake m50v251n$i:/usr/local/; done | |
``` | |
| |
| |
##HTMLファイルをコマンドから作成 | |
| |
``` | |
#@yoshitake/tt | |
cp -r ~/metasearch/tmp . | |
rm tmp/*/*.gz tmp/*/*.fq | |
#for i in tmp/*/*.tsv; do bash run-rerun.sh $i; done | |
for i in tmp/*/*.tsv; do echo bash run-rerun.sh $i; done|xargs -I {} -P 15 bash -c "{}" | |
| |
for i in tmp/*/*/result.html; do echo "<a href='$i'>$i</a>"; done > list.html | |
| |
``` | |
| |
``` | |
#i=tmp/641b199531d9634a9710474c83f1cd63/641b199531d9634a9710474c83f1cd63.fq.tsv | |
i=$1 | |
| |
| |
name="`cat $(dirname $i)/$(dirname $i|sed 's/tmp.//')/result.html |grep h3|head -n 1|sed 's/<\/.*//; s/.*>//'`" | |
#species | |
mkdir -p $(dirname $i)/$(dirname $i|sed 's/tmp.//') | |
awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.species $i > $i.species.tsv | |
./calccor $i.species.tsv ~/yoshitake/db_merge_species | |
singularity exec ~/metasearch/script/python3_env_mako_installed.sif python create_page_species.py "`echo $i.species.tsv|sed 's/.tsv$//'`" "$name" | |
singularity run ~/metasearch/script/krona_v2.7.1_cv1.sif ktImportText $(dirname $i)/result.kraken -o $(dirname $i)/$(dirname $i|sed 's/tmp.//')/krona_out.html | |
mv $(dirname $i)/$(dirname $i|sed 's/tmp.//') $(dirname $i)/$(dirname $i|sed 's/tmp.//')s | |
| |
#genus | |
mkdir -p $(dirname $i)/$(dirname $i|sed 's/tmp.//') | |
awk -F'\t' 'FILENAME==ARGV[1]{a["root;"$2]=$3} FILENAME==ARGV[2]{if($1=="id"){if(FNR>1){for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}; print $0; delete cnt}else{cnt[a[$1]]+=$2}} END{for(i in cnt){if(i!=""){print i"\t"cnt[i]}}}' ~/yoshitake/SILVA_132_SSU-LSU_Ref.fasta.name.genus $i > $i.genus.tsv | |
./calccor $i.genus.tsv ~/yoshitake/db_merge_genus | |
singularity exec ~/metasearch/script/python3_env_mako_installed.sif python create_page_genus.py "`echo $i.genus.tsv|sed 's/.tsv$//'`" "$name" | |
singularity run ~/metasearch/script/krona_v2.7.1_cv1.sif ktImportText $(dirname $i)/result.kraken -o $(dirname $i)/$(dirname $i|sed 's/tmp.//')/krona_out.html | |
mv $(dirname $i)/$(dirname $i|sed 's/tmp.//') $(dirname $i)/$(dirname $i|sed 's/tmp.//')g | |
| |
#totalやり直し | |
mkdir -p $(dirname $i)/$(dirname $i|sed 's/tmp.//') | |
./calccor $i ~/yoshitake/db_merge | |
singularity exec ~/metasearch/script/python3_env_mako_installed.sif python create_page.py "`echo $i|sed 's/.tsv$//'`" "$name" | |
singularity run ~/metasearch/script/krona_v2.7.1_cv1.sif ktImportText $(dirname $i)/result.kraken -o $(dirname $i)/$(dirname $i|sed 's/tmp.//')/krona_out.html | |
| |
``` | |
| |