[root@banner-web organism]# find . -name *_cds.fa >z-summary.cds.fa 
[root@banner-web organism]# find . -name *_pro.fa >z-summary.pro.fa 

:%s/.*\///g
[root@banner-web organism]# wc -l z-summary.cds.fa
4666 z-summary.cds.fa
[root@banner-web organism]# sort z-summary.cds.fa | uniq | wc -l
2574

[root@banner-web organism]# wc -l z-summary.pro.fa
3372 z-summary.pro.fa
[root@banner-web organism]# sort z-summary.pro.fa | uniq | wc -l
1922

find . -name *_cds*html >z-summary.cds.html
wc -l z-summary.cds.html
sort z-summary.cds.html | uniq | wc -l

find ./ -name '*html' |while read i
> do
> sed -i 's/mamual/Manual/g'  $i
> done

sed: couldn't edit ./Rosaceae/Pyrus/Pyrus_betulifolia: not a regular file
sed: can't read ./Solanaceae/Solanum/Solanum_sogarandinum/Solanum_sogarandinum_PG4032/S-gene_cds/Solanum_sogarandinum_PG4032_SLF15-2_cds: No such file or directory
sed: can't read .html: No such file or directory
sed: can't read ./Solanaceae/Solanum/Solanum_spirale/Solanum_spirale_gwh_YDDXSA095/S-gene_cds/Solanum_spirale_gwh_YDDXSA095_SLF18-3_cds: No such file or directory
sed: can't read .html: No such file or directory
## find error space in ./Solanaceae/Solanum/Solanum_sogarandinum/Solanum_sogarandinum_PG4032/S-gene_cds/Solanum_sogarandinum_PG4032_SLF15-2_cds .html
./Solanaceae/Solanum/Solanum_spirale/Solanum_spirale_gwh_YDDXSA095/S-gene_cds/Solanum_spirale_gwh_YDDXSA095_SLF18-3_cds .html
Pyrus_betulifolia - 副本.html

find ./ -name '*.html' |while read i
do
sed -i 's/Mamual/Manual/g'  $i
done

# build all S gene Phylo
# including NCBI get and new identified
#NCBI in blastDB, new identified in this path
find . -name "*ase*pro*a" | while read i ; do cat $i >> Type1_S-RNase_proteins_identified.fa ; done
:%s/>/\r>/g
#20241214
find . -name "*ase*pro*a" | while read i ; do cat $i >> Type1_S-RNase_proteins_identified.fa ; done
find . -name "*ase*cds*a" | while read i ; do cat $i >> Type1_S-RNase_cds_identified.fa ; done
find . -name "*SLF*pro*a" | while read i ; do cat $i >> Type1_SLF_proteins_identified.fa ; done
find . -name "*SFB*pro*a" | while read i ; do cat $i >> Type1_SLF_proteins_identified.fa ; done
find . -name "*FBX*pro*a" | while read i ; do cat $i >> Type1_SLF_proteins_identified.fa ; done
find . -name "*box*pro*a" | while read i ; do cat $i >> Type1_SLF_proteins_identified.fa ; done
find . -name "*SLF*cds*a" | while read i ; do cat $i >> Type1_SLF_cds_identified.fa ; done
find . -name "*SFB*cds*a" | while read i ; do cat $i >> Type1_SLF_cds_identified.fa ; done
find . -name "*FBX*cds*a" | while read i ; do cat $i >> Type1_SLF_cds_identified.fa ; done
find . -name "*box*cds*a" | while read i ; do cat $i >> Type1_SLF_cds_identified.fa ; done
:%s/>/\r>/g
:%s/\r//gc
[root@banner-web organism]# grep '>' Type1_SLF_cds_identified.fa | wc -l
4372
[root@banner-web organism]# grep '>' Type1_SLF_proteins_identified.fa | wc -l
2997
[root@banner-web organism]# grep '>' Type1_S-RNase_cds_identified.fa | wc -l
440
[root@banner-web organism]# grep '>' Type1_S-RNase_proteins_identified.fa | wc -l
434
#be sure has S-gene dic, in couting identified genes num
[root@banner-web organism]#  grep '>' Type1_SLF_cds_identified.fa | wc -l
4501
[root@banner-web organism]# grep '>' Type1_SLF_proteins_identified.fa | wc -l
3122
[root@banner-web organism]# grep '>' Type1_S-RNase_cds_identified.fa | wc -l
442
[root@banner-web organism]# grep '>' Type1_S-RNase_proteins_identified.fa | wc -l
436

#20241217
[root@banner-web organism]# find ./Brassicaceae -name "*pro*a" | wc -l
72
[root@banner-web organism]# find ./Brassicaceae -name "*SCR*pro*a" | wc -l
36
[root@banner-web organism]# find ./Brassicaceae -name "*SRK*pro*a" | wc -l
36
[root@banner-web organism]# find ./Brassicaceae -name "*cds*a" | wc -l
82
[root@banner-web organism]# find ./Brassicaceae -name "*SCR*cds*a" | wc -l
36
[root@banner-web organism]# find ./Brassicaceae -name "*SRK*cds*a" | wc -l
36
[root@banner-web organism]# find ./Brassicaceae -name "*cds*a" | grep -v "SCR" | grep -v 'SRK' | less
[root@banner-web organism]# find ./Brassicaceae -name "*cds*a" | grep -v "SCR" | grep -v 'SRK' 
./Brassicaceae/Arabidopsis/Arabidopsis_suecica/Arabidopsis_suecica_ASS3/s-genes.cds.fa
./Brassicaceae/Arabidopsis/Arabidopsis_lyrata/Arabidopsis_lyrata_ALyr_1.0/s-genes.cds.fa
./Brassicaceae/Arabidopsis/Arabidopsis_thaliana/Arabidopsis_thaliana_Col-CC/s_ath.cds.fa
./Brassicaceae/Arabidopsis/Arabidopsis_arenosa/Arabidopsis_arenosa_UiO_Aaren_v1.0/s-genes.cds.fa
./Brassicaceae/Brassica/Brassica_juncea/Brassica_juncea_ASM1870372v1/s-gene.cds.fa
./Brassicaceae/Brassica/Brassica_oleracea/Brassica_oleracea_ASM3463897v1/s-genes.cds.fa
./Brassicaceae/Brassica/Brassica_cretica/Brassica_cretica_B_cretica_A_v2/s.genes.cds.fasta
./Brassicaceae/Brassica/Brassica_nigra/Brassica_nigra_NI100/s-genes.cds.fa
./Brassicaceae/Brassica/Brassica_carinata/Brassica_carinata_PGLv1/s-genes.cds.fa
./Brassicaceae/Brassica/Brassica_rapa/Brassica_rapa_Chiifu_V4.0/s_brapa.cds.fa

[root@banner-web organism]# find ./Brassicaceae -name "*SCR*pro*a" | while read i ; do cat $i >> Type2_SCR_proteins_identified.fa ; done 
[root@banner-web organism]# find ./Brassicaceae -name "*SRK*pro*a" | while read i ; do cat $i >> Type2_SRK_proteins_identified.fa ; done
[root@banner-web organism]# find ./Brassicaceae -name "*SCR*cds*a" | while read i ; do cat $i >> Type2_SCR_cds_identified.fa ; done
[root@banner-web organism]# find ./Brassicaceae -name "*SRK*cds*a" | while read i ; do cat $i >> Type2_SRK_cds_identified.fa ; done
:%s/>/\r>/g
:%s/\r//gc

[root@banner-web organism]# find ./Poaceae -name "*DUF*pep*a" | while read i ; do cat $i >> Type6_DUF_proteins_identified.fa ; done
[root@banner-web organism]# find ./Poaceae -name "*HPS*pep*a" | while read i ; do cat $i >> Type6_HPS_proteins_identified.fa ; done
[root@banner-web organism]# find ./Poaceae -name "*DUF*cds*a" | while read i ; do cat $i >> Type6_DUF_cds_identified.fa ; done
[root@banner-web organism]# find ./Poaceae -name "*HPS*cds*a" | while read i ; do cat $i >> Type6_HPS_cds_identified.fa ; done

