assembly_id	genome_id	genome_def	crispr_array_locus_merge	crispr_array_location_merge	crispr_locus_id	crispr_pred_method	array_in_prot	prot_within_array_20000	prot_in_genome	crispr_type_by_cas_prot	consensus_repeat	repeat_length	self-targeting_spacer_number	self-targeting_target_number	spacer_location	protospacer_location	repeat_type	spacer_locus_num	spacer_num	correct_crispr_type	genome_cas_prots	unknown_protein_around_crispr	L10	L10_domain	L9	L9_domain	L8	L8_domain	L7	L7_domain	L6	L6_domain	L5	L5_domain	L4	L4_domain	L3	L3_domain	L2	L2_domain	L1	L1_domain	R1	R1_domain	R2	R2_domain	R3	R3_domain	R4	R4_domain	R5	R5_domain	R6	R6_domain	R7	R7_domain	R8	R8_domain	R9	R9_domain	R10	R10_domain
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	1	615672-615783	1	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	CCTGGACACTTATCCCACAACTGCGAT	27	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|78aa|up_9|NZ_AP018280.1_603482_603716_+,NA|93aa|up_8|NZ_AP018280.1_603857_604136_+,NA|174aa|up_7|NZ_AP018280.1_604226_604748_+,NA|142aa|up_6|NZ_AP018280.1_604750_605176_+,NA|217aa|up_5|NZ_AP018280.1_605162_605813_+,NA|612aa|up_4|NZ_AP018280.1_605866_607702_+,NA|107aa|up_3|NZ_AP018280.1_608023_608344_-,NA|89aa|up_1|NZ_AP018280.1_611071_611338_+,NA|116aa|down_0|NZ_AP018280.1_616108_616456_+,NA|93aa|down_4|NZ_AP018280.1_621528_621807_-,NA|337aa|down_5|NZ_AP018280.1_622176_623187_+,NA|112aa|down_6|NZ_AP018280.1_623812_624148_+,NA|284aa|down_8|NZ_AP018280.1_626965_627817_+,NA|84aa|down_9|NZ_AP018280.1_628780_629032_+	NA|78aa|up_9|NZ_AP018280.1_603482_603716_+	NA	NA|93aa|up_8|NZ_AP018280.1_603857_604136_+	NA	NA|174aa|up_7|NZ_AP018280.1_604226_604748_+	NA	NA|142aa|up_6|NZ_AP018280.1_604750_605176_+	NA	NA|217aa|up_5|NZ_AP018280.1_605162_605813_+	NA	NA|612aa|up_4|NZ_AP018280.1_605866_607702_+	NA	NA|107aa|up_3|NZ_AP018280.1_608023_608344_-	NA	NA|388aa|up_2|NZ_AP018280.1_609648_610812_+	cd01189, INT_ICEBs1_C_like, C-terminal catalytic domain of integrases from bacterial phages and conjugate transposons	NA|89aa|up_1|NZ_AP018280.1_611071_611338_+	NA	NA|728aa|up_0|NZ_AP018280.1_611340_613524_-	NF033203, entero_EhxA, enterohemolysin EhxA	NA|116aa|down_0|NZ_AP018280.1_616108_616456_+	NA	NA|269aa|down_1|NZ_AP018280.1_616585_617392_+	pfam08713, DNA_alkylation, DNA alkylation repair enzyme	NA|152aa|down_2|NZ_AP018280.1_617379_617835_+	cd14503, PTP-bact, bacterial tyrosine-protein phosphataseS similar to Neisseria NMA1982	NA|125aa|down_3|NZ_AP018280.1_617895_618270_+	pfam14534, DUF4440, Domain of unknown function (DUF4440)	NA|93aa|down_4|NZ_AP018280.1_621528_621807_-	NA	NA|337aa|down_5|NZ_AP018280.1_622176_623187_+	NA	NA|112aa|down_6|NZ_AP018280.1_623812_624148_+	NA	NA|956aa|down_7|NZ_AP018280.1_624147_627015_+	TIGR02746, hypothetical_protein, type-IV secretion system protein TraC	NA|284aa|down_8|NZ_AP018280.1_626965_627817_+	NA	NA|84aa|down_9|NZ_AP018280.1_628780_629032_+	NA
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	2	2568468-2569103	1,2,1	CRT,CRISPRCasFinder,PILER-CR	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	GTTTCAATCCCTGATAGGGATTAGTTTGAATTGCAAT,CTGATAGGGATTAGTTTGAATTGCAAT,GTTTC----------AATCCCTGATAGGGATTAGTTTGAATTGCAAT	37,27,47	0	0	NA	NA	I-D,II-B:NA:I-D,II-B	8,8,7	8	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|144aa|up_7|NZ_AP018280.1_2557703_2558135_+,NA|76aa|up_0|NZ_AP018280.1_2568083_2568311_-,NA	NA|201aa|up_9|NZ_AP018280.1_2555393_2555996_-	COG1309, AcrR, Transcriptional regulator [Transcription]	NA|385aa|up_8|NZ_AP018280.1_2556118_2557273_-	COG1979, COG1979, Uncharacterized oxidoreductases, Fe-dependent alcohol dehydrogenase family [Energy production and conversion]	NA|144aa|up_7|NZ_AP018280.1_2557703_2558135_+	NA	NA|330aa|up_6|NZ_AP018280.1_2558978_2559968_+	PRK12309, PRK12309, transaldolase	NA|413aa|up_5|NZ_AP018280.1_2560405_2561644_+	PRK00180, PRK00180, acetate kinase A/propionate kinase 2; Reviewed	NA|149aa|up_4|NZ_AP018280.1_2561855_2562302_-	pfam04151, PPC, Bacterial pre-peptidase C-terminal domain	NA|425aa|up_3|NZ_AP018280.1_2562511_2563786_-	TIGR02917, TPR_domain_protein, putative PEP-CTERM system TPR-repeat lipoprotein	NA|466aa|up_2|NZ_AP018280.1_2563962_2565360_+	TIGR04074, Methyltransferase_type_12, 3' terminal RNA ribose 2'-O-methyltransferase Hen1	NA|870aa|up_1|NZ_AP018280.1_2565373_2567983_+	TIGR04075, Ser/Thr_phosphatase_family_protein, polynucleotide kinase-phosphatase	NA|76aa|up_0|NZ_AP018280.1_2568083_2568311_-	NA	NA|1241aa|down_0|NZ_AP018280.1_2569473_2573196_+	PLN03241, PLN03241, magnesium chelatase subunit H; Provisional	NA|147aa|down_1|NZ_AP018280.1_2573645_2574086_+	COG1357, COG1357, Pentapeptide repeats containing protein [Function unknown]	NA|210aa|down_2|NZ_AP018280.1_2574252_2574882_+	COG0349, Rnd, Ribonuclease D [Translation, ribosomal structure and biogenesis]	NA|152aa|down_3|NZ_AP018280.1_2575090_2575546_-	cd00158, RHOD, Rhodanese Homology Domain (RHOD); an alpha beta fold domain found duplicated in the rhodanese protein	NA|486aa|down_4|NZ_AP018280.1_2575837_2577295_-	pfam12899, Glyco_hydro_100, Alkaline and neutral invertase	NA|265aa|down_5|NZ_AP018280.1_2578258_2579053_-	cd04189, G1P_TT_long, G1P_TT_long represents the long form of glucose-1-phosphate thymidylyltransferase	NA|105aa|down_6|NZ_AP018280.1_2579360_2579675_-	COG3339, COG3339, Uncharacterized conserved protein [Function unknown]	NA|283aa|down_7|NZ_AP018280.1_2579891_2580740_+	COG0811, TolQ, Biopolymer transport proteins [Intracellular trafficking and secretion]	NA|212aa|down_8|NZ_AP018280.1_2580802_2581438_+	COG0848, ExbD, Biopolymer transport protein [Intracellular trafficking and secretion]	NA|577aa|down_9|NZ_AP018280.1_2581966_2583697_-	COG1293, COG1293, Predicted RNA-binding protein homologous to eukaryotic snRNP [Transcription]
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	3	3204371-3205120	3,2,2	CRISPRCasFinder,CRT,PILER-CR	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	GTTTCAATCCCTGATAGGGATTATTTTGAATTGCAAT,GTTTCAATCCCTGATAGGGATTATTTTGAATTGCAAT,GTTTC----------AATCCCTGATAGGGATTATTTTGAATTGCAA	37,37,46	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	10,10,9	10	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|66aa|up_9|NZ_AP018280.1_3192497_3192695_-,NA|126aa|down_0|NZ_AP018280.1_3205461_3205839_-,NA|100aa|down_4|NZ_AP018280.1_3209254_3209554_+	NA|66aa|up_9|NZ_AP018280.1_3192497_3192695_-	NA	NA|451aa|up_8|NZ_AP018280.1_3192693_3194046_+	COG1357, COG1357, Pentapeptide repeats containing protein [Function unknown]	NA|377aa|up_7|NZ_AP018280.1_3194148_3195279_+	TIGR01263, 4-hydroxyphenylpyruvate_dioxygenase, 4-hydroxyphenylpyruvate dioxygenase	NA|233aa|up_6|NZ_AP018280.1_3195489_3196188_+	COG0546, Gph, Predicted phosphatases [General function prediction only]	NA|83aa|up_5|NZ_AP018280.1_3196255_3196504_-	pfam17275, DUF5340, Family of unknown function (DUF5340)	NA|302aa|up_4|NZ_AP018280.1_3196633_3197539_-	PLN02460, PLN02460, indole-3-glycerol-phosphate synthase	NA|477aa|up_3|NZ_AP018280.1_3197705_3199136_-	PRK06416, PRK06416, dihydrolipoamide dehydrogenase; Reviewed	NA|912aa|up_2|NZ_AP018280.1_3199748_3202484_+	NF012200, choice_anch_D, choice-of-anchor D domain-containing protein	NA|197aa|up_1|NZ_AP018280.1_3202534_3203125_+	cd06260, DUF820, Domain of unknown function (DUF820)	NA|222aa|up_0|NZ_AP018280.1_3203253_3203919_-	sd00006, TPR, Tetratricopeptide repeat	NA|126aa|down_0|NZ_AP018280.1_3205461_3205839_-	NA	NA|144aa|down_1|NZ_AP018280.1_3205937_3206369_-	COG4296, COG4296, Uncharacterized protein conserved in bacteria [Function unknown]	NA|658aa|down_2|NZ_AP018280.1_3206839_3208813_-	PRK00174, PRK00174, acetyl-CoA synthetase; Provisional	NA|77aa|down_3|NZ_AP018280.1_3209052_3209283_+	TIGR02683, Uncharacterized_protein_HI_1419, putative addiction module killer protein	NA|100aa|down_4|NZ_AP018280.1_3209254_3209554_+	NA	NA|255aa|down_5|NZ_AP018280.1_3209644_3210409_-	TIGR02493, PFLA, pyruvate formate-lyase 1-activating enzyme	NA|682aa|down_6|NZ_AP018280.1_3210422_3212468_-	PRK00007, PRK00007, elongation factor G; Reviewed	NA|346aa|down_7|NZ_AP018280.1_3212662_3213700_-	cd05120, APH_ChoK_like, Aminoglycoside 3'-phosphotransferase and Choline Kinase family	NA|405aa|down_8|NZ_AP018280.1_3213795_3215010_-	COG2907, COG2907, Predicted NAD/FAD-binding protein [General function prediction only]	NA|513aa|down_9|NZ_AP018280.1_3215033_3216572_-	COG1233, COG1233, Phytoene dehydrogenase and related proteins [Secondary metabolites biosynthesis, transport, and catabolism]
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	4	3429982-3430107	4	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	GATTTTGTGCGATCGCACTTTCACCAAGAGAAATGC	36	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|99aa|up_7|NZ_AP018280.1_3416383_3416680_+,NA|106aa|up_5|NZ_AP018280.1_3417933_3418251_-,NA|560aa|up_2|NZ_AP018280.1_3421040_3422720_-,NA|449aa|up_1|NZ_AP018280.1_3423312_3424659_+,NA	NA|181aa|up_9|NZ_AP018280.1_3413866_3414409_-	COG0431, COG0431, Predicted flavoprotein [General function prediction only]	NA|506aa|up_8|NZ_AP018280.1_3414641_3416159_-	pfam08547, CIA30, Complex I intermediate-associated protein 30 (CIA30)	NA|99aa|up_7|NZ_AP018280.1_3416383_3416680_+	NA	NA|327aa|up_6|NZ_AP018280.1_3416920_3417901_+	pfam01446, Rep_1, Replication protein	NA|106aa|up_5|NZ_AP018280.1_3417933_3418251_-	NA	NA|481aa|up_4|NZ_AP018280.1_3418618_3420061_+	cd05800, PGM_like2, This PGM-like (phosphoglucomutase-like) protein of unknown function belongs to the alpha-D-phosphohexomutase superfamily and is found in both archaea and bacteria	NA|198aa|up_3|NZ_AP018280.1_3420328_3420922_+	pfam05685, Uma2, Putative restriction endonuclease	NA|560aa|up_2|NZ_AP018280.1_3421040_3422720_-	NA	NA|449aa|up_1|NZ_AP018280.1_3423312_3424659_+	NA	NA|1700aa|up_0|NZ_AP018280.1_3424809_3429909_+	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|940aa|down_0|NZ_AP018280.1_3430455_3433275_+	COG0642, BaeS, Signal transduction histidine kinase [Signal transduction mechanisms]	NA|626aa|down_1|NZ_AP018280.1_3433248_3435126_-	PRK00331, PRK00331, isomerizing glutamine--fructose-6-phosphate transaminase	NA|82aa|down_2|NZ_AP018280.1_3435441_3435687_-	CHL00065, psaC, photosystem I subunit VII	NA|919aa|down_3|NZ_AP018280.1_3435966_3438723_-	TIGR01451, unnamed_protein_product, conserved repeat domain	NA|309aa|down_4|NZ_AP018280.1_3442985_3443912_-	pfam01345, DUF11, Domain of unknown function DUF11	NA|356aa|down_5|NZ_AP018280.1_3444852_3445920_+	COG0435, ECM4, Predicted glutathione S-transferase [Posttranslational modification, protein turnover, chaperones]	NA|294aa|down_6|NZ_AP018280.1_3446218_3447100_+	PRK13057, PRK13057, lipid kinase	NA|211aa|down_7|NZ_AP018280.1_3447294_3447927_+	COG3932, COG3932, Uncharacterized ABC-type transport system, permease components [General function prediction only]	NA|260aa|down_8|NZ_AP018280.1_3448493_3449273_-	TIGR03069, RNA-binding_S4_domain-containing_protein, photosystem II S4 domain protein	NA|184aa|down_9|NZ_AP018280.1_3449421_3449973_-	pfam13302, Acetyltransf_3, Acetyltransferase (GNAT) domain
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	5	3861631-3862463	3,5,3	PILER-CR,CRISPRCasFinder,CRT	no	WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Type I-D	ATTGCAATTCAAACTAATCCCTATTAGGGATT-----------GAAAC,ATTGCAATTCAAACTAATCCCTATTAGGGATTGAAAC,ATTGCAATTCAAACTAATCCCTATTAGGGATTGAAAC	48,37,37	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	10,10,11	11	TypeI-D	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|135aa|up_4|NZ_AP018280.1_3857886_3858291_-,NA|84aa|down_0|NZ_AP018280.1_3862567_3862819_+,NA|412aa|down_1|NZ_AP018280.1_3862890_3864126_+,NA|135aa|down_5|NZ_AP018280.1_3869384_3869789_-	cas3|704aa|up_9|NZ_AP018280.1_3850253_3852365_+	cd09710, Cas3_I-D, CRISPR/Cas system-associated protein Cas3; Distinct diverged subfamily of Cas3 helicase domain	NA|86aa|up_8|NZ_AP018280.1_3852401_3852659_+	COG2442, COG2442, Uncharacterized conserved protein [Function unknown]	cas10d|1115aa|up_7|NZ_AP018280.1_3852724_3856069_+	TIGR03174, cas_Csc3, CRISPR type I-D/CYANO-associated protein Csc3/Cas10d	csc2gr7|338aa|up_6|NZ_AP018280.1_3856133_3857147_+	pfam18320, Csc2, Csc2 Crispr	csc1gr5|236aa|up_5|NZ_AP018280.1_3857152_3857860_+	cd09711, Csc1_I-D, CRISPR/Cas system-associated protein Csc1	NA|135aa|up_4|NZ_AP018280.1_3857886_3858291_-	NA	cas6|280aa|up_3|NZ_AP018280.1_3858349_3859189_+	COG5551, COG5551, CRISPR system related protein, RAMP superfamily [Defense    mechanisms]	cas4|196aa|up_2|NZ_AP018280.1_3859290_3859878_+	TIGR00372, conserved_hypothetical_protein, CRISPR-associated protein Cas4	cas1|335aa|up_1|NZ_AP018280.1_3859953_3860958_+	TIGR04093, hypothetical_protein_L8106_25395, CRISPR-associated endonuclease Cas1, subtype CYANO	cas2|91aa|up_0|NZ_AP018280.1_3861094_3861367_+	cd09725, Cas2_I_II_III, CRISPR/Cas system-associated protein Cas2	NA|84aa|down_0|NZ_AP018280.1_3862567_3862819_+	NA	NA|412aa|down_1|NZ_AP018280.1_3862890_3864126_+	NA	NA|1048aa|down_2|NZ_AP018280.1_3864280_3867424_-	COG3641, PfoR, Predicted membrane protein, putative toxin regulator [General function prediction only]	NA|68aa|down_3|NZ_AP018280.1_3867868_3868072_+	PLN00014, PLN00014, light-harvesting-like protein 3; Provisional	NA|373aa|down_4|NZ_AP018280.1_3868220_3869339_-	COG0642, BaeS, Signal transduction histidine kinase [Signal transduction mechanisms]	NA|135aa|down_5|NZ_AP018280.1_3869384_3869789_-	NA	NA|166aa|down_6|NZ_AP018280.1_3869789_3870287_-	cd00154, Rab, Ras-related in brain (Rab) family of small guanosine triphosphatases (GTPases)	NA|596aa|down_7|NZ_AP018280.1_3870304_3872092_-	cd07185, OmpA_C-like, Peptidoglycan binding domains similar to the C-terminal domain of outer-membrane protein OmpA	NA|201aa|down_8|NZ_AP018280.1_3872232_3872835_-	PRK12704, PRK12704, phosphodiesterase; Provisional	NA|446aa|down_9|NZ_AP018280.1_3873195_3874533_-	cd01116, P_permease, Permease P (pink-eyed dilution)
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	6	3916772-3917036	6,4,4	CRISPRCasFinder,PILER-CR,CRT	no	csa3	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Type I-A	AGTTTCAATCCCGTTGCCGGGAATCGTAAGGTTTCAAGT,GTTTCAATCCCGTTGCCGGGAATCGTAAGGTTTCAAGT,GTTTCAATCCCGTTGCCGGGAATCGTAAGGTTTCAAGT	39,38,38	0	0	NA	NA	NA:NA:NA	3,3,3	3	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|372aa|up_8|NZ_AP018280.1_3907545_3908661_+,NA|178aa|up_0|NZ_AP018280.1_3916041_3916575_+,NA|163aa|down_0|NZ_AP018280.1_3917383_3917872_-,NA|90aa|down_3|NZ_AP018280.1_3921956_3922226_+,NA|115aa|down_6|NZ_AP018280.1_3924130_3924475_-,NA|115aa|down_7|NZ_AP018280.1_3924616_3924961_-,NA|75aa|down_8|NZ_AP018280.1_3926045_3926270_-,NA|88aa|down_9|NZ_AP018280.1_3926282_3926546_-	NA|254aa|up_9|NZ_AP018280.1_3906448_3907210_+	COG5398, COG5398, Heme oxygenase [Inorganic ion transport and metabolism]	NA|372aa|up_8|NZ_AP018280.1_3907545_3908661_+	NA	NA|359aa|up_7|NZ_AP018280.1_3908988_3910065_+	PRK13654, PRK13654, magnesium-protoporphyrin IX monomethyl ester cyclase; Provisional	NA|538aa|up_6|NZ_AP018280.1_3910151_3911765_-	TIGR02026, BchE, magnesium-protoporphyrin IX monomethyl ester anaerobic oxidative cyclase	NA|238aa|up_5|NZ_AP018280.1_3912168_3912882_-	COG1357, COG1357, Pentapeptide repeats containing protein [Function unknown]	NA|157aa|up_4|NZ_AP018280.1_3913407_3913878_-	COG5500, COG5500, Predicted integral membrane protein [Function unknown]	NA|171aa|up_3|NZ_AP018280.1_3913976_3914489_-	pfam07617, DUF1579, Protein of unknown function (DUF1579)	NA|150aa|up_2|NZ_AP018280.1_3914587_3915037_-	COG2764, PhnB, Uncharacterized protein conserved in bacteria [Function unknown]	NA|193aa|up_1|NZ_AP018280.1_3915227_3915806_-	COG0262, FolA, Dihydrofolate reductase [Coenzyme metabolism]	NA|178aa|up_0|NZ_AP018280.1_3916041_3916575_+	NA	NA|163aa|down_0|NZ_AP018280.1_3917383_3917872_-	NA	NA|169aa|down_1|NZ_AP018280.1_3917947_3918454_-	COG3409, COG3409, Putative peptidoglycan-binding domain-containing protein [Cell envelope biogenesis, outer membrane]	NA|816aa|down_2|NZ_AP018280.1_3919278_3921726_-	COG5635, COG5635, Predicted NTPase (NACHT family) [Signal transduction mechanisms]	NA|90aa|down_3|NZ_AP018280.1_3921956_3922226_+	NA	NA|213aa|down_4|NZ_AP018280.1_3922459_3923098_+	pfam00300, His_Phos_1, Histidine phosphatase superfamily (branch 1)	NA|196aa|down_5|NZ_AP018280.1_3923336_3923924_-	pfam05685, Uma2, Putative restriction endonuclease	NA|115aa|down_6|NZ_AP018280.1_3924130_3924475_-	NA	NA|115aa|down_7|NZ_AP018280.1_3924616_3924961_-	NA	NA|75aa|down_8|NZ_AP018280.1_3926045_3926270_-	NA	NA|88aa|down_9|NZ_AP018280.1_3926282_3926546_-	NA
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	7	4007284-4007377	7	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	CTACCTTGCTTCCGCATAGCGTTACTACAAA	31	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|128aa|up_6|NZ_AP018280.1_3996089_3996473_+,NA|171aa|up_5|NZ_AP018280.1_3996870_3997383_+,NA|87aa|down_3|NZ_AP018280.1_4016588_4016849_-,NA|97aa|down_4|NZ_AP018280.1_4016848_4017139_-,NA|64aa|down_8|NZ_AP018280.1_4024101_4024293_-,NA|130aa|down_9|NZ_AP018280.1_4024436_4024826_-	NA|355aa|up_9|NZ_AP018280.1_3992607_3993672_-	PRK01889, PRK01889, GTPase RsgA; Reviewed	NA|288aa|up_8|NZ_AP018280.1_3994112_3994976_+	pfam01636, APH, Phosphotransferase enzyme family	NA|185aa|up_7|NZ_AP018280.1_3995098_3995653_+	COG0262, FolA, Dihydrofolate reductase [Coenzyme metabolism]	NA|128aa|up_6|NZ_AP018280.1_3996089_3996473_+	NA	NA|171aa|up_5|NZ_AP018280.1_3996870_3997383_+	NA	NA|296aa|up_4|NZ_AP018280.1_3997463_3998351_+	pfam06685, DUF1186, Protein of unknown function (DUF1186)	NA|303aa|up_3|NZ_AP018280.1_3998484_3999393_+	COG1715, Mrr, Restriction endonuclease [Defense mechanisms]	NA|964aa|up_2|NZ_AP018280.1_3999769_4002661_+	cd18011, DEXDc_RapA, DEXH-box helicase domain of RapA	NA|1296aa|up_1|NZ_AP018280.1_4002897_4006785_+	TIGR02987, m6_adenine_and_m5_cytosine_DNA_methyltransferase, type II restriction m6 adenine DNA methyltransferase, Alw26I/Eco31I/Esp3I family	NA|117aa|up_0|NZ_AP018280.1_4006885_4007236_+	PRK12275, PRK12275, hypothetical protein; Reviewed	NA|709aa|down_0|NZ_AP018280.1_4008431_4010558_+	COG0210, UvrD, Superfamily I DNA and RNA helicases [DNA replication, recombination, and repair]	NA|1823aa|down_1|NZ_AP018280.1_4010604_4016073_+	cd17923, DEXHc_Hrq1-like, DEAH-box helicase domain of Hrq1 and similar proteins	NA|72aa|down_2|NZ_AP018280.1_4016139_4016355_+	PRK06599, PRK06599, DNA topoisomerase I; Validated	NA|87aa|down_3|NZ_AP018280.1_4016588_4016849_-	NA	NA|97aa|down_4|NZ_AP018280.1_4016848_4017139_-	NA	NA|523aa|down_5|NZ_AP018280.1_4017974_4019543_-	pfam06527, TniQ, TniQ	NA|399aa|down_6|NZ_AP018280.1_4019742_4020939_-	pfam13401, AAA_22, AAA domain	NA|922aa|down_7|NZ_AP018280.1_4020938_4023704_-	pfam00665, rve, Integrase core domain	NA|64aa|down_8|NZ_AP018280.1_4024101_4024293_-	NA	NA|130aa|down_9|NZ_AP018280.1_4024436_4024826_-	NA
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	8	4659193-4659592	5,8,5	PILER-CR,CRISPRCasFinder,CRT	no	RT	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Unclear	GTTTC----------AATCCCTGATAGGGATTATTTTTAATTGCAATA,GTTTCAATCCCTGATAGGGATTATTTTTAATTGCAAT,GTTTCAATCCCTGATAGGGATTATTTTTAATTGCAATA	48,37,38	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	5,5,5	5	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|84aa|up_6|NZ_AP018280.1_4646166_4646418_+,NA|412aa|up_5|NZ_AP018280.1_4646489_4647725_+,NA|177aa|down_0|NZ_AP018280.1_4660116_4660647_+,NA|304aa|down_3|NZ_AP018280.1_4663592_4664504_+,NA|162aa|down_7|NZ_AP018280.1_4668437_4668923_-	NA|833aa|up_9|NZ_AP018280.1_4639432_4641931_+	pfam05860, Haemagg_act, haemagglutination activity domain	RT|604aa|up_8|NZ_AP018280.1_4642592_4644404_+	TIGR04416, hypothetical_protein, group II intron reverse transcriptase/maturase	NA|524aa|up_7|NZ_AP018280.1_4644515_4646087_+	pfam05860, Haemagg_act, haemagglutination activity domain	NA|84aa|up_6|NZ_AP018280.1_4646166_4646418_+	NA	NA|412aa|up_5|NZ_AP018280.1_4646489_4647725_+	NA	NA|827aa|up_4|NZ_AP018280.1_4648998_4651479_+	pfam05860, Haemagg_act, haemagglutination activity domain	NA|793aa|up_3|NZ_AP018280.1_4651632_4654011_+	pfam16734, Pilin_GH, Type IV pilin-like G and H, putative	NA|227aa|up_2|NZ_AP018280.1_4654069_4654750_+	pfam01816, LRV, Leucine rich repeat variant	NA|613aa|up_1|NZ_AP018280.1_4654662_4656501_+	pfam01816, LRV, Leucine rich repeat variant	NA|822aa|up_0|NZ_AP018280.1_4656649_4659115_+	pfam05860, Haemagg_act, haemagglutination activity domain	NA|177aa|down_0|NZ_AP018280.1_4660116_4660647_+	NA	NA|360aa|down_1|NZ_AP018280.1_4660889_4661969_-	TIGR03301, PhnW-AepZ, 2-aminoethylphosphonate aminotransferase	NA|391aa|down_2|NZ_AP018280.1_4662042_4663215_-	TIGR03297, Ppyr-DeCO2ase, phosphonopyruvate decarboxylase	NA|304aa|down_3|NZ_AP018280.1_4663592_4664504_+	NA	NA|449aa|down_4|NZ_AP018280.1_4664574_4665921_-	COG1785, PhoA, Alkaline phosphatase [Inorganic ion transport and metabolism]	NA|482aa|down_5|NZ_AP018280.1_4666130_4667576_+	COG3670, COG3670, Lignostilbene-alpha,beta-dioxygenase and related enzymes [Secondary metabolites biosynthesis, transport, and catabolism]	NA|104aa|down_6|NZ_AP018280.1_4667933_4668245_+	pfam14110, DUF4282, Domain of unknown function (DUF4282)	NA|162aa|down_7|NZ_AP018280.1_4668437_4668923_-	NA	NA|454aa|down_8|NZ_AP018280.1_4669326_4670688_+	pfam13546, DDE_5, DDE superfamily endonuclease	NA|224aa|down_9|NZ_AP018280.1_4670841_4671513_-	COG0705, COG0705, Membrane associated serine protease [Amino acid transport and metabolism]
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	9	4719463-4719565	9	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	AATCGATTAAATGTCGATCGCTCTACCAA	29	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA,NA|195aa|down_0|NZ_AP018280.1_4719895_4720480_+,NA|79aa|down_7|NZ_AP018280.1_4725562_4725799_+,NA|77aa|down_8|NZ_AP018280.1_4725872_4726103_-	NA|1711aa|up_9|NZ_AP018280.1_4700517_4705650_+	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|75aa|up_8|NZ_AP018280.1_4705805_4706030_+	pfam13619, KTSC, KTSC domain	NA|469aa|up_7|NZ_AP018280.1_4706119_4707526_-	PRK05291, trmE, tRNA uridine-5-carboxymethylaminomethyl(34) synthesis GTPase MnmE	NA|161aa|up_6|NZ_AP018280.1_4707893_4708376_+	COG4875, COG4875, Uncharacterized protein conserved in bacteria with a cystatin-like fold [Function unknown]	NA|407aa|up_5|NZ_AP018280.1_4708545_4709766_+	PLN00093, PLN00093, geranylgeranyl diphosphate reductase; Provisional	NA|361aa|up_4|NZ_AP018280.1_4710314_4711397_-	TIGR01151, Photosystem_QB_protein, photosystem II, DI subunit (also called Q(B))	NA|129aa|up_3|NZ_AP018280.1_4711750_4712137_-	cd01521, RHOD_PspE2, Member of the Rhodanese Homology Domain superfamily	NA|314aa|up_2|NZ_AP018280.1_4714049_4714991_+	pfam00535, Glycos_transf_2, Glycosyl transferase family 2	NA|578aa|up_1|NZ_AP018280.1_4715583_4717317_+	COG0433, COG0433,  HerA helicase [Replication, recombination, and repair]	NA|509aa|up_0|NZ_AP018280.1_4717352_4718879_-	cd13438, SPFH_eoslipins_u2, Uncharacterized prokaryotic subgroup of the stomatin-like proteins (slipins) family; belonging to the SPFH (stomatin, prohibitin, flotillin, and HflK/C) superfamily	NA|195aa|down_0|NZ_AP018280.1_4719895_4720480_+	NA	NA|223aa|down_1|NZ_AP018280.1_4720585_4721254_-	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|381aa|down_2|NZ_AP018280.1_4721346_4722489_+	pfam01609, DDE_Tnp_1, Transposase DDE domain	NA|208aa|down_3|NZ_AP018280.1_4722474_4723098_-	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|220aa|down_4|NZ_AP018280.1_4723209_4723869_+	PRK05986, PRK05986, cob(I)yrinic acid a,c-diamide adenosyltransferase	NA|277aa|down_5|NZ_AP018280.1_4723895_4724726_-	COG5464, COG5464, Uncharacterized conserved protein [Function unknown]	NA|213aa|down_6|NZ_AP018280.1_4724807_4725446_+	pfam00899, ThiF, ThiF family	NA|79aa|down_7|NZ_AP018280.1_4725562_4725799_+	NA	NA|77aa|down_8|NZ_AP018280.1_4725872_4726103_-	NA	NA|454aa|down_9|NZ_AP018280.1_4726555_4727917_-	cd00880, Era_like, E
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	10	4817727-4817824	10	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	GTCGTTCCTCCTCCCAATGACGGGAT	26	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|167aa|up_8|NZ_AP018280.1_4810899_4811400_+,NA|78aa|up_5|NZ_AP018280.1_4814412_4814646_+,NA|61aa|up_4|NZ_AP018280.1_4814913_4815096_+,NA|139aa|down_0|NZ_AP018280.1_4818645_4819062_-,NA|411aa|down_5|NZ_AP018280.1_4822808_4824041_+,NA|121aa|down_6|NZ_AP018280.1_4824146_4824509_-,NA|106aa|down_7|NZ_AP018280.1_4824756_4825074_+	NA|432aa|up_9|NZ_AP018280.1_4809026_4810322_-	PRK11856, PRK11856, branched-chain alpha-keto acid dehydrogenase subunit E2; Reviewed	NA|167aa|up_8|NZ_AP018280.1_4810899_4811400_+	NA	NA|149aa|up_7|NZ_AP018280.1_4811594_4812041_-	pfam11068, YlqD, YlqD protein	NA|659aa|up_6|NZ_AP018280.1_4812083_4814060_-	COG1022, FAA1, Long-chain acyl-CoA synthetases (AMP-forming) [Lipid metabolism]	NA|78aa|up_5|NZ_AP018280.1_4814412_4814646_+	NA	NA|61aa|up_4|NZ_AP018280.1_4814913_4815096_+	NA	NA|68aa|up_3|NZ_AP018280.1_4815320_4815524_+	pfam15919, HicB_lk_antitox, HicB_like antitoxin of bacterial toxin-antitoxin system	NA|75aa|up_2|NZ_AP018280.1_4815520_4815745_+	pfam07927, HicA_toxin, HicA toxin of bacterial toxin-antitoxin,	NA|85aa|up_1|NZ_AP018280.1_4815898_4816153_+	TIGR02606, Antitoxin_ParD, putative addiction module antidote protein, CC2985 family	NA|102aa|up_0|NZ_AP018280.1_4816164_4816470_+	COG3668, ParE, Plasmid stabilization system protein [General function prediction only]	NA|139aa|down_0|NZ_AP018280.1_4818645_4819062_-	NA	NA|167aa|down_1|NZ_AP018280.1_4819526_4820027_+	COG3409, COG3409, Putative peptidoglycan-binding domain-containing protein [Cell envelope biogenesis, outer membrane]	NA|219aa|down_2|NZ_AP018280.1_4820152_4820809_-	COG2197, CitB, Response regulator containing a CheY-like receiver domain and an HTH DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|152aa|down_3|NZ_AP018280.1_4820978_4821434_-	pfam07730, HisKA_3, Histidine kinase	NA|326aa|down_4|NZ_AP018280.1_4821595_4822573_-	COG4585, COG4585, Signal transduction histidine kinase [Signal transduction mechanisms]	NA|411aa|down_5|NZ_AP018280.1_4822808_4824041_+	NA	NA|121aa|down_6|NZ_AP018280.1_4824146_4824509_-	NA	NA|106aa|down_7|NZ_AP018280.1_4824756_4825074_+	NA	NA|332aa|down_8|NZ_AP018280.1_4825308_4826304_+	pfam02574, S-methyl_trans, Homocysteine S-methyltransferase	NA|683aa|down_9|NZ_AP018280.1_4826420_4828469_+	pfam04966, OprB, Carbohydrate-selective porin, OprB family
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	11	5385395-5385521	11	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	CCAAAATTGGGGTTGGGAATCTGCTGGTTGTAGGTTGGATTGG	43	1	1	5385438-5385478	NZ_AP018274.1_1850479-1850439	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|97aa|up_7|NZ_AP018280.1_5375649_5375940_+,NA|298aa|up_3|NZ_AP018280.1_5381364_5382258_+,NA|64aa|up_2|NZ_AP018280.1_5382557_5382749_+,NA	NA|74aa|up_9|NZ_AP018280.1_5374220_5374442_+	pfam02672, CP12, CP12 domain	NA|199aa|up_8|NZ_AP018280.1_5374896_5375493_+	pfam11375, DUF3177, Protein of unknown function (DUF3177)	NA|97aa|up_7|NZ_AP018280.1_5375649_5375940_+	NA	NA|541aa|up_6|NZ_AP018280.1_5376747_5378370_+	PRK00074, guaA, GMP synthase; Reviewed	NA|140aa|up_5|NZ_AP018280.1_5378818_5379238_+	COG0723, QcrA, Rieske Fe-S protein [Energy production and conversion]	NA|567aa|up_4|NZ_AP018280.1_5379382_5381083_+	TIGR00815, Sulfate_transporter, high affinity sulphate transporter 1	NA|298aa|up_3|NZ_AP018280.1_5381364_5382258_+	NA	NA|64aa|up_2|NZ_AP018280.1_5382557_5382749_+	NA	NA|254aa|up_1|NZ_AP018280.1_5383089_5383851_-	TIGR02595, conserved_hypothetical_protein, PEP-CTERM protein-sorting domain	NA|207aa|up_0|NZ_AP018280.1_5384274_5384895_+	pfam06080, DUF938, Protein of unknown function (DUF938)	NA|181aa|down_0|NZ_AP018280.1_5386772_5387315_+	COG1791, COG1791, Uncharacterized conserved protein, contains double-stranded beta-helix domain [Function unknown]	NA|209aa|down_1|NZ_AP018280.1_5387431_5388058_+	cd07524, HAD_Pase, phosphatase, similar to Bacillus subtilis MtnX; belongs to the haloacid dehalogenase-like superfamily	NA|225aa|down_2|NZ_AP018280.1_5388116_5388791_+	TIGR03328, dehydratase-enolase-phosphatase_complex_1, methylthioribulose-1-phosphate dehydratase	NA|97aa|down_3|NZ_AP018280.1_5388855_5389146_+	COG1359, COG1359, Uncharacterized conserved protein [Function unknown]	NA|399aa|down_4|NZ_AP018280.1_5389268_5390465_+	COG1649, COG1649, Uncharacterized protein conserved in bacteria [Function unknown]	NA|244aa|down_5|NZ_AP018280.1_5390494_5391226_-	PRK00024, PRK00024, DNA repair protein RadC	NA|617aa|down_6|NZ_AP018280.1_5391965_5393816_+	PRK12305, thrS, threonyl-tRNA synthetase; Reviewed	NA|436aa|down_7|NZ_AP018280.1_5394473_5395781_+	COG1649, COG1649, Uncharacterized protein conserved in bacteria [Function unknown]	NA|362aa|down_8|NZ_AP018280.1_5396465_5397551_+	PRK09354, recA, recombinase A; Provisional	NA|529aa|down_9|NZ_AP018280.1_5397657_5399244_-	cd07488, Peptidases_S8_2, Peptidase S8 family domain, uncharacterized subfamily 2
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	12	5448361-5448464	12	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	ACAGCAGCTTGACGCTGGGCTATTTC	26	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|170aa|up_4|NZ_AP018280.1_5442263_5442773_-,NA|80aa|down_8|NZ_AP018280.1_5461489_5461729_-	NA|525aa|up_9|NZ_AP018280.1_5435774_5437349_+	pfam05729, NACHT, NACHT domain	NA|183aa|up_8|NZ_AP018280.1_5437396_5437945_+	PRK00131, aroK, shikimate kinase; Reviewed	NA|462aa|up_7|NZ_AP018280.1_5438090_5439476_-	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|449aa|up_6|NZ_AP018280.1_5439553_5440900_-	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|298aa|up_5|NZ_AP018280.1_5441074_5441968_+	cd04250, AAK_NAGK-C, AAK_NAGK-C: N-Acetyl-L-glutamate kinase - cyclic (NAGK-C) catalyzes the phosphorylation of the gamma-COOH group of N-acetyl-L-glutamate (NAG) by ATP in the second step of arginine biosynthesis found in some bacteria and photosynthetic organisms using the non-acetylated, cyclic route of ornithine biosynthesis	NA|170aa|up_4|NZ_AP018280.1_5442263_5442773_-	NA	NA|429aa|up_3|NZ_AP018280.1_5443042_5444329_-	PRK00077, eno, enolase; Provisional	NA|211aa|up_2|NZ_AP018280.1_5444607_5445240_+	pfam13353, Fer4_12, 4Fe-4S single cluster domain	NA|253aa|up_1|NZ_AP018280.1_5445401_5446160_-	cd03255, ABC_MJ0796_LolCDE_FtsE, ATP-binding cassette domain of the transporters involved in export of lipoprotein and macrolide, and cell division protein	NA|406aa|up_0|NZ_AP018280.1_5446364_5447582_-	PRK10535, PRK10535, macrolide ABC transporter ATP-binding protein/permease MacB	NA|288aa|down_0|NZ_AP018280.1_5449468_5450332_+	pfam06485, DUF1092, Protein of unknown function (DUF1092)	NA|437aa|down_1|NZ_AP018280.1_5450584_5451895_+	COG0312, TldD, Predicted Zn-dependent proteases and their inactivated homologs [General function prediction only]	NA|903aa|down_2|NZ_AP018280.1_5452445_5455154_+	cd01031, EriC, ClC chloride channel EriC	NA|339aa|down_3|NZ_AP018280.1_5455189_5456206_+	cd07025, Peptidase_S66, LD-Carboxypeptidase, a serine protease, includes microcin C7 self immunity protein	NA|636aa|down_4|NZ_AP018280.1_5456341_5458249_+	pfam05990, DUF900, Alpha/beta hydrolase of unknown function (DUF900)	NA|297aa|down_5|NZ_AP018280.1_5458330_5459221_-	COG1210, GalU, UDP-glucose pyrophosphorylase [Cell envelope biogenesis, outer membrane]	NA|358aa|down_6|NZ_AP018280.1_5459207_5460281_-	COG0153, GalK, Galactokinase [Carbohydrate transport and metabolism]	NA|296aa|down_7|NZ_AP018280.1_5460353_5461241_+	pfam13527, Acetyltransf_9, Acetyltransferase (GNAT) domain	NA|80aa|down_8|NZ_AP018280.1_5461489_5461729_-	NA	NA|583aa|down_9|NZ_AP018280.1_5461794_5463543_-	PRK13981, PRK13981, NAD synthetase; Provisional
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	13	5798648-5798730	13	CRISPRCasFinder	no		RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Orphan	AGATTAGAAGCGCAGTGGGAGGGTGATAA	29	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA,NA|169aa|down_8|NZ_AP018280.1_5811663_5812170_-,NA|892aa|down_9|NZ_AP018280.1_5812212_5814888_-	NA|354aa|up_9|NZ_AP018280.1_5785489_5786551_+	cd06321, PBP1_ABC_sugar_binding-like, periplasmic sugar-binding domain of uncharacterized ABC-type transport systems	NA|518aa|up_8|NZ_AP018280.1_5786659_5788213_+	COG1129, MglA, ABC-type sugar transport system, ATPase component [Carbohydrate transport and metabolism]	NA|333aa|up_7|NZ_AP018280.1_5788209_5789208_+	COG4158, COG4158, Predicted ABC-type sugar transport system, permease component [General function prediction only]	NA|538aa|up_6|NZ_AP018280.1_5789405_5791019_+	pfam04966, OprB, Carbohydrate-selective porin, OprB family	NA|140aa|up_5|NZ_AP018280.1_5791140_5791560_-	COG3791, COG3791, Uncharacterized conserved protein [Function unknown]	NA|124aa|up_4|NZ_AP018280.1_5791596_5791968_-	pfam11196, DUF2834, Protein of unknown function (DUF2834)	NA|285aa|up_3|NZ_AP018280.1_5792075_5792930_+	pfam17765, MLTR_LBD, MmyB-like transcription regulator ligand binding domain	NA|308aa|up_2|NZ_AP018280.1_5793148_5794072_+	pfam13359, DDE_Tnp_4, DDE superfamily endonuclease	NA|264aa|up_1|NZ_AP018280.1_5794256_5795048_-	COG0398, COG0398, Uncharacterized conserved protein [Function unknown]	NA|288aa|up_0|NZ_AP018280.1_5797081_5797945_-	COG1309, AcrR, Transcriptional regulator [Transcription]	NA|392aa|down_0|NZ_AP018280.1_5799719_5800895_+	TIGR01185, membrane_spanning_subunit, DevC protein	NA|241aa|down_1|NZ_AP018280.1_5800942_5801665_+	TIGR02982, heterocyst_DevA, ABC exporter ATP-binding subunit, DevA family	NA|479aa|down_2|NZ_AP018280.1_5801708_5803145_-	pfam03050, DDE_Tnp_IS66, Transposase IS66 family	NA|440aa|down_3|NZ_AP018280.1_5803236_5804556_-	COG1357, COG1357, Pentapeptide repeats containing protein [Function unknown]	NA|588aa|down_4|NZ_AP018280.1_5804608_5806372_+	TIGR02917, TPR_domain_protein, putative PEP-CTERM system TPR-repeat lipoprotein	NA|835aa|down_5|NZ_AP018280.1_5806466_5808971_+	cd13566, PBP2_phosphate, Substrate binding domain of putative ABC-type phosphate transporter, a member of the type 2 periplasmic binding fold superfamily	NA|132aa|down_6|NZ_AP018280.1_5809062_5809458_-	pfam14107, DUF4280, Domain of unknown function (DUF4280)	NA|654aa|down_7|NZ_AP018280.1_5809619_5811581_-	COG3501, VgrG, Uncharacterized protein conserved in bacteria [Function unknown]	NA|169aa|down_8|NZ_AP018280.1_5811663_5812170_-	NA	NA|892aa|down_9|NZ_AP018280.1_5812212_5814888_-	NA
GCF_002368375.1_ASM236837v1	NZ_AP018280	Calothrix sp. NIES-4101 DNA, complete genome	14	5991600-5991926	6,14,6	PILER-CR,CRISPRCasFinder,CRT	no	cas6	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG	Unclear	CTTGAAATTACCTTTTACCCCGCAAGGGGATTGAAACG,CTTGAAATTACCTTTTACCCCGCAAGGGGATTGAAAC,CTTGAAATTACCTTTTACCCCGCAAGGGGATTGAAACG	38,37,38	0	0	NA	NA	III-A:III-A:III-A	4,4,4	4	Unclear	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA,NA|148aa|down_2|NZ_AP018280.1_5994734_5995178_-,NA|121aa|down_7|NZ_AP018280.1_6000937_6001300_-	NA|395aa|up_9|NZ_AP018280.1_5976945_5978130_-	pfam08852, DUF1822, Protein of unknown function (DUF1822)	NA|397aa|up_8|NZ_AP018280.1_5978181_5979372_-	TIGR02937, RNA_polymerase_sigma_factor, RNA polymerase sigma factor, sigma-70 family	NA|466aa|up_7|NZ_AP018280.1_5979499_5980897_-	COG4995, COG4995, Uncharacterized protein conserved in bacteria [Function unknown]	NA|312aa|up_6|NZ_AP018280.1_5981444_5982380_-	COG1079, COG1079, Uncharacterized ABC-type transport system, permease component [General function prediction only]	NA|348aa|up_5|NZ_AP018280.1_5982449_5983493_-	COG4603, COG4603, ABC-type uncharacterized transport system, permease component [General function prediction only]	NA|499aa|up_4|NZ_AP018280.1_5983506_5985003_-	COG3845, COG3845, ABC-type uncharacterized transport systems, ATPase components [General function prediction only]	NA|351aa|up_3|NZ_AP018280.1_5985307_5986360_-	cd06304, PBP1_BmpA_Med_PnrA-like, periplasmic binding component of a family of basic membrane lipoproteins from Borrelia and various putative lipoproteins from other bacteria	NA|700aa|up_2|NZ_AP018280.1_5986516_5988616_-	COG0145, HyuA, N-methylhydantoinase A/acetone carboxylase, beta subunit [Amino acid transport and metabolism / Secondary metabolites biosynthesis, transport, and catabolism]	NA|84aa|up_1|NZ_AP018280.1_5988895_5989147_+	pfam13275, S4_2, S4 domain	cas6|379aa|up_0|NZ_AP018280.1_5990184_5991321_+	COG5551, COG5551, CRISPR system related protein, RAMP superfamily [Defense    mechanisms]	NA|227aa|down_0|NZ_AP018280.1_5992055_5992736_+	COG0569, TrkA, K+ transport systems, NAD-binding component [Inorganic ion transport and metabolism]	NA|542aa|down_1|NZ_AP018280.1_5992937_5994563_-	COG0025, NhaP, NhaP-type Na+/H+ and K+/H+ antiporters [Inorganic ion transport and metabolism]	NA|148aa|down_2|NZ_AP018280.1_5994734_5995178_-	NA	NA|82aa|down_3|NZ_AP018280.1_5995485_5995731_+	COG4118, Phd, Antitoxin of toxin-antitoxin stability system [Cell division and chromosome partitioning]	NA|133aa|down_4|NZ_AP018280.1_5995727_5996126_+	cd09872, PIN_Sll0205-like, VapC-like PIN domain of Sll0205 protein and homologs	NA|155aa|down_5|NZ_AP018280.1_5996482_5996947_-	cd06553, ASCH_Ef3133_like, ASC-1 homology domain, subfamily similar to Enterococcus faecalis Ef3133	NA|888aa|down_6|NZ_AP018280.1_5997373_6000037_-	COG3593, COG3593, Predicted ATP-dependent endonuclease of the OLD family [DNA replication, recombination, and repair]	NA|121aa|down_7|NZ_AP018280.1_6000937_6001300_-	NA	NA|304aa|down_8|NZ_AP018280.1_6001369_6002281_-	TIGR01247, drrB, daunorubicin resistance ABC transporter membrane protein	NA|340aa|down_9|NZ_AP018280.1_6002449_6003469_-	COG1131, CcmA, ABC-type multidrug transport system, ATPase component [Defense mechanisms]
GCF_002368375.1_ASM236837v1	NZ_AP018274	Calothrix sp. NIES-4101 plasmid plasmid1 DNA, complete genome	1	1243991-1244317	1,1,1	PILER-CR,CRT,CRISPRCasFinder	no	cas6	cas3,DinG,cas6,RT	Unclear	CTTGAAATTACCTTTTACCCCGCAAGGGGATTGAAACG,CGTTTCAATCCCCTTGCGGGGTAAAAGGTAATTTCAAG,GTTTCAATCCCCTTGCGGGGTAAAAGGTAATTTCAAG	38,38,37	0	0	NA	NA	III-A:III-A:III-A	4,4,4	4	Unclear	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|121aa|up_7|NZ_AP018274.1_1234616_1234979_+,NA|148aa|up_2|NZ_AP018274.1_1240738_1241182_+,NA	NA|340aa|up_9|NZ_AP018274.1_1232447_1233467_+	COG1131, CcmA, ABC-type multidrug transport system, ATPase component [Defense mechanisms]	NA|304aa|up_8|NZ_AP018274.1_1233635_1234547_+	TIGR01247, drrB, daunorubicin resistance ABC transporter membrane protein	NA|121aa|up_7|NZ_AP018274.1_1234616_1234979_+	NA	NA|888aa|up_6|NZ_AP018274.1_1235879_1238543_+	COG3593, COG3593, Predicted ATP-dependent endonuclease of the OLD family [DNA replication, recombination, and repair]	NA|155aa|up_5|NZ_AP018274.1_1238969_1239434_+	cd06553, ASCH_Ef3133_like, ASC-1 homology domain, subfamily similar to Enterococcus faecalis Ef3133	NA|133aa|up_4|NZ_AP018274.1_1239790_1240189_-	cd09872, PIN_Sll0205-like, VapC-like PIN domain of Sll0205 protein and homologs	NA|82aa|up_3|NZ_AP018274.1_1240185_1240431_-	COG4118, Phd, Antitoxin of toxin-antitoxin stability system [Cell division and chromosome partitioning]	NA|148aa|up_2|NZ_AP018274.1_1240738_1241182_+	NA	NA|542aa|up_1|NZ_AP018274.1_1241353_1242979_+	COG0025, NhaP, NhaP-type Na+/H+ and K+/H+ antiporters [Inorganic ion transport and metabolism]	NA|227aa|up_0|NZ_AP018274.1_1243180_1243861_-	COG0569, TrkA, K+ transport systems, NAD-binding component [Inorganic ion transport and metabolism]	cas6|379aa|down_0|NZ_AP018274.1_1244595_1245732_-	COG5551, COG5551, CRISPR system related protein, RAMP superfamily [Defense    mechanisms]	NA|84aa|down_1|NZ_AP018274.1_1246769_1247021_-	pfam13275, S4_2, S4 domain	NA|700aa|down_2|NZ_AP018274.1_1247300_1249400_+	COG0145, HyuA, N-methylhydantoinase A/acetone carboxylase, beta subunit [Amino acid transport and metabolism / Secondary metabolites biosynthesis, transport, and catabolism]	NA|351aa|down_3|NZ_AP018274.1_1249556_1250609_+	cd06304, PBP1_BmpA_Med_PnrA-like, periplasmic binding component of a family of basic membrane lipoproteins from Borrelia and various putative lipoproteins from other bacteria	NA|499aa|down_4|NZ_AP018274.1_1250913_1252410_+	COG3845, COG3845, ABC-type uncharacterized transport systems, ATPase components [General function prediction only]	NA|348aa|down_5|NZ_AP018274.1_1252423_1253467_+	COG4603, COG4603, ABC-type uncharacterized transport system, permease component [General function prediction only]	NA|312aa|down_6|NZ_AP018274.1_1253536_1254472_+	COG1079, COG1079, Uncharacterized ABC-type transport system, permease component [General function prediction only]	NA|466aa|down_7|NZ_AP018274.1_1255019_1256417_+	COG4995, COG4995, Uncharacterized protein conserved in bacteria [Function unknown]	NA|397aa|down_8|NZ_AP018274.1_1256544_1257735_+	TIGR02937, RNA_polymerase_sigma_factor, RNA polymerase sigma factor, sigma-70 family	NA|395aa|down_9|NZ_AP018274.1_1257786_1258971_+	pfam08852, DUF1822, Protein of unknown function (DUF1822)
GCF_002368375.1_ASM236837v1	NZ_AP018274	Calothrix sp. NIES-4101 plasmid plasmid1 DNA, complete genome	2	1437187-1437269	2	CRISPRCasFinder	no		cas3,DinG,cas6,RT	Orphan	TTATCCCCCTCCCACTGCGCTTCTAATCT	29	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|892aa|up_9|NZ_AP018274.1_1421028_1423704_+,NA|169aa|up_8|NZ_AP018274.1_1423746_1424253_+,NA	NA|892aa|up_9|NZ_AP018274.1_1421028_1423704_+	NA	NA|169aa|up_8|NZ_AP018274.1_1423746_1424253_+	NA	NA|654aa|up_7|NZ_AP018274.1_1424335_1426297_+	COG3501, VgrG, Uncharacterized protein conserved in bacteria [Function unknown]	NA|132aa|up_6|NZ_AP018274.1_1426458_1426854_+	pfam14107, DUF4280, Domain of unknown function (DUF4280)	NA|835aa|up_5|NZ_AP018274.1_1426945_1429450_-	cd13566, PBP2_phosphate, Substrate binding domain of putative ABC-type phosphate transporter, a member of the type 2 periplasmic binding fold superfamily	NA|588aa|up_4|NZ_AP018274.1_1429544_1431308_-	TIGR02917, TPR_domain_protein, putative PEP-CTERM system TPR-repeat lipoprotein	NA|440aa|up_3|NZ_AP018274.1_1431360_1432680_+	COG1357, COG1357, Pentapeptide repeats containing protein [Function unknown]	NA|479aa|up_2|NZ_AP018274.1_1432771_1434208_+	pfam03050, DDE_Tnp_IS66, Transposase IS66 family	NA|241aa|up_1|NZ_AP018274.1_1434251_1434974_-	TIGR02982, heterocyst_DevA, ABC exporter ATP-binding subunit, DevA family	NA|392aa|up_0|NZ_AP018274.1_1435021_1436197_-	TIGR01185, membrane_spanning_subunit, DevC protein	NA|288aa|down_0|NZ_AP018274.1_1437971_1438835_+	COG1309, AcrR, Transcriptional regulator [Transcription]	NA|264aa|down_1|NZ_AP018274.1_1440868_1441660_+	COG0398, COG0398, Uncharacterized conserved protein [Function unknown]	NA|308aa|down_2|NZ_AP018274.1_1441844_1442768_-	pfam13359, DDE_Tnp_4, DDE superfamily endonuclease	NA|285aa|down_3|NZ_AP018274.1_1442986_1443841_-	pfam17765, MLTR_LBD, MmyB-like transcription regulator ligand binding domain	NA|124aa|down_4|NZ_AP018274.1_1443948_1444320_+	pfam11196, DUF2834, Protein of unknown function (DUF2834)	NA|140aa|down_5|NZ_AP018274.1_1444356_1444776_+	COG3791, COG3791, Uncharacterized conserved protein [Function unknown]	NA|538aa|down_6|NZ_AP018274.1_1444897_1446511_-	pfam04966, OprB, Carbohydrate-selective porin, OprB family	NA|333aa|down_7|NZ_AP018274.1_1446708_1447707_-	COG4158, COG4158, Predicted ABC-type sugar transport system, permease component [General function prediction only]	NA|518aa|down_8|NZ_AP018274.1_1447703_1449257_-	COG1129, MglA, ABC-type sugar transport system, ATPase component [Carbohydrate transport and metabolism]	NA|354aa|down_9|NZ_AP018274.1_1449365_1450427_-	cd06321, PBP1_ABC_sugar_binding-like, periplasmic sugar-binding domain of uncharacterized ABC-type transport systems
GCF_002368375.1_ASM236837v1	NZ_AP018274	Calothrix sp. NIES-4101 plasmid plasmid1 DNA, complete genome	3	1787453-1787556	3	CRISPRCasFinder	no		cas3,DinG,cas6,RT	Orphan	GAAATAGCCCAGCGTCAAGCTGCTGT	26	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|80aa|up_8|NZ_AP018274.1_1774187_1774427_+,NA|170aa|down_4|NZ_AP018274.1_1793143_1793653_+	NA|583aa|up_9|NZ_AP018274.1_1772373_1774122_+	PRK13981, PRK13981, NAD synthetase; Provisional	NA|80aa|up_8|NZ_AP018274.1_1774187_1774427_+	NA	NA|296aa|up_7|NZ_AP018274.1_1774675_1775563_-	pfam13527, Acetyltransf_9, Acetyltransferase (GNAT) domain	NA|358aa|up_6|NZ_AP018274.1_1775635_1776709_+	COG0153, GalK, Galactokinase [Carbohydrate transport and metabolism]	NA|297aa|up_5|NZ_AP018274.1_1776695_1777586_+	COG1210, GalU, UDP-glucose pyrophosphorylase [Cell envelope biogenesis, outer membrane]	NA|636aa|up_4|NZ_AP018274.1_1777667_1779575_-	pfam05990, DUF900, Alpha/beta hydrolase of unknown function (DUF900)	NA|339aa|up_3|NZ_AP018274.1_1779710_1780727_-	cd07025, Peptidase_S66, LD-Carboxypeptidase, a serine protease, includes microcin C7 self immunity protein	NA|903aa|up_2|NZ_AP018274.1_1780762_1783471_-	cd01031, EriC, ClC chloride channel EriC	NA|437aa|up_1|NZ_AP018274.1_1784021_1785332_-	COG0312, TldD, Predicted Zn-dependent proteases and their inactivated homologs [General function prediction only]	NA|288aa|up_0|NZ_AP018274.1_1785584_1786448_-	pfam06485, DUF1092, Protein of unknown function (DUF1092)	NA|406aa|down_0|NZ_AP018274.1_1788334_1789552_+	PRK10535, PRK10535, macrolide ABC transporter ATP-binding protein/permease MacB	NA|253aa|down_1|NZ_AP018274.1_1789756_1790515_+	cd03255, ABC_MJ0796_LolCDE_FtsE, ATP-binding cassette domain of the transporters involved in export of lipoprotein and macrolide, and cell division protein	NA|211aa|down_2|NZ_AP018274.1_1790676_1791309_-	pfam13353, Fer4_12, 4Fe-4S single cluster domain	NA|429aa|down_3|NZ_AP018274.1_1791587_1792874_+	PRK00077, eno, enolase; Provisional	NA|170aa|down_4|NZ_AP018274.1_1793143_1793653_+	NA	NA|298aa|down_5|NZ_AP018274.1_1793948_1794842_-	cd04250, AAK_NAGK-C, AAK_NAGK-C: N-Acetyl-L-glutamate kinase - cyclic (NAGK-C) catalyzes the phosphorylation of the gamma-COOH group of N-acetyl-L-glutamate (NAG) by ATP in the second step of arginine biosynthesis found in some bacteria and photosynthetic organisms using the non-acetylated, cyclic route of ornithine biosynthesis	NA|449aa|down_6|NZ_AP018274.1_1795016_1796363_+	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|462aa|down_7|NZ_AP018274.1_1796440_1797826_+	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|183aa|down_8|NZ_AP018274.1_1797971_1798520_-	PRK00131, aroK, shikimate kinase; Reviewed	NA|525aa|down_9|NZ_AP018274.1_1798567_1800142_-	pfam05729, NACHT, NACHT domain
GCF_002368375.1_ASM236837v1	NZ_AP018274	Calothrix sp. NIES-4101 plasmid plasmid1 DNA, complete genome	4	2418093-2418190	4	CRISPRCasFinder	no		cas3,DinG,cas6,RT	Orphan	ATCCCGTCATTGAGAGGAGGAACGAC	26	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|106aa|up_7|NZ_AP018274.1_2410842_2411160_-,NA|121aa|up_6|NZ_AP018274.1_2411407_2411770_+,NA|411aa|up_5|NZ_AP018274.1_2411875_2413108_-,NA|139aa|up_0|NZ_AP018274.1_2416854_2417271_+,NA|61aa|down_4|NZ_AP018274.1_2420820_2421003_-,NA|78aa|down_5|NZ_AP018274.1_2421270_2421504_-,NA|167aa|down_8|NZ_AP018274.1_2424516_2425017_-	NA|683aa|up_9|NZ_AP018274.1_2407447_2409496_-	pfam04966, OprB, Carbohydrate-selective porin, OprB family	NA|332aa|up_8|NZ_AP018274.1_2409612_2410608_-	pfam02574, S-methyl_trans, Homocysteine S-methyltransferase	NA|106aa|up_7|NZ_AP018274.1_2410842_2411160_-	NA	NA|121aa|up_6|NZ_AP018274.1_2411407_2411770_+	NA	NA|411aa|up_5|NZ_AP018274.1_2411875_2413108_-	NA	NA|326aa|up_4|NZ_AP018274.1_2413343_2414321_+	COG4585, COG4585, Signal transduction histidine kinase [Signal transduction mechanisms]	NA|152aa|up_3|NZ_AP018274.1_2414482_2414938_+	pfam07730, HisKA_3, Histidine kinase	NA|219aa|up_2|NZ_AP018274.1_2415107_2415764_+	COG2197, CitB, Response regulator containing a CheY-like receiver domain and an HTH DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|167aa|up_1|NZ_AP018274.1_2415889_2416390_-	COG3409, COG3409, Putative peptidoglycan-binding domain-containing protein [Cell envelope biogenesis, outer membrane]	NA|139aa|up_0|NZ_AP018274.1_2416854_2417271_+	NA	NA|102aa|down_0|NZ_AP018274.1_2419446_2419752_-	COG3668, ParE, Plasmid stabilization system protein [General function prediction only]	NA|85aa|down_1|NZ_AP018274.1_2419763_2420018_-	TIGR02606, Antitoxin_ParD, putative addiction module antidote protein, CC2985 family	NA|75aa|down_2|NZ_AP018274.1_2420171_2420396_-	pfam07927, HicA_toxin, HicA toxin of bacterial toxin-antitoxin,	NA|68aa|down_3|NZ_AP018274.1_2420392_2420596_-	pfam15919, HicB_lk_antitox, HicB_like antitoxin of bacterial toxin-antitoxin system	NA|61aa|down_4|NZ_AP018274.1_2420820_2421003_-	NA	NA|78aa|down_5|NZ_AP018274.1_2421270_2421504_-	NA	NA|659aa|down_6|NZ_AP018274.1_2421856_2423833_+	COG1022, FAA1, Long-chain acyl-CoA synthetases (AMP-forming) [Lipid metabolism]	NA|149aa|down_7|NZ_AP018274.1_2423875_2424322_+	pfam11068, YlqD, YlqD protein	NA|167aa|down_8|NZ_AP018274.1_2424516_2425017_-	NA	NA|432aa|down_9|NZ_AP018274.1_2425594_2426890_+	PRK11856, PRK11856, branched-chain alpha-keto acid dehydrogenase subunit E2; Reviewed
GCF_002368375.1_ASM236837v1	NZ_AP018274	Calothrix sp. NIES-4101 plasmid plasmid1 DNA, complete genome	5	2516343-2516445	5	CRISPRCasFinder	no		cas3,DinG,cas6,RT	Orphan	TTAGCTCAGTTGGTAGAGCGATCGACATT	29	0	0	NA	NA	NA	1	1	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|77aa|up_8|NZ_AP018274.1_2509813_2510044_+,NA|79aa|up_7|NZ_AP018274.1_2510117_2510354_-,NA|195aa|up_0|NZ_AP018274.1_2515436_2516021_-,NA	NA|454aa|up_9|NZ_AP018274.1_2507999_2509361_+	cd00880, Era_like, E	NA|77aa|up_8|NZ_AP018274.1_2509813_2510044_+	NA	NA|79aa|up_7|NZ_AP018274.1_2510117_2510354_-	NA	NA|213aa|up_6|NZ_AP018274.1_2510470_2511109_-	pfam00899, ThiF, ThiF family	NA|277aa|up_5|NZ_AP018274.1_2511190_2512021_+	COG5464, COG5464, Uncharacterized conserved protein [Function unknown]	NA|220aa|up_4|NZ_AP018274.1_2512047_2512707_-	PRK05986, PRK05986, cob(I)yrinic acid a,c-diamide adenosyltransferase	NA|208aa|up_3|NZ_AP018274.1_2512818_2513442_+	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|381aa|up_2|NZ_AP018274.1_2513427_2514570_-	pfam01609, DDE_Tnp_1, Transposase DDE domain	NA|223aa|up_1|NZ_AP018274.1_2514662_2515331_+	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|195aa|up_0|NZ_AP018274.1_2515436_2516021_-	NA	NA|509aa|down_0|NZ_AP018274.1_2517037_2518564_+	cd13438, SPFH_eoslipins_u2, Uncharacterized prokaryotic subgroup of the stomatin-like proteins (slipins) family; belonging to the SPFH (stomatin, prohibitin, flotillin, and HflK/C) superfamily	NA|578aa|down_1|NZ_AP018274.1_2518599_2520333_-	COG0433, COG0433,  HerA helicase [Replication, recombination, and repair]	NA|314aa|down_2|NZ_AP018274.1_2520925_2521867_-	pfam00535, Glycos_transf_2, Glycosyl transferase family 2	NA|129aa|down_3|NZ_AP018274.1_2523779_2524166_+	cd01521, RHOD_PspE2, Member of the Rhodanese Homology Domain superfamily	NA|361aa|down_4|NZ_AP018274.1_2524519_2525602_+	TIGR01151, Photosystem_QB_protein, photosystem II, DI subunit (also called Q(B))	NA|407aa|down_5|NZ_AP018274.1_2526150_2527371_-	PLN00093, PLN00093, geranylgeranyl diphosphate reductase; Provisional	NA|161aa|down_6|NZ_AP018274.1_2527540_2528023_-	COG4875, COG4875, Uncharacterized protein conserved in bacteria with a cystatin-like fold [Function unknown]	NA|469aa|down_7|NZ_AP018274.1_2528390_2529797_+	PRK05291, trmE, tRNA uridine-5-carboxymethylaminomethyl(34) synthesis GTPase MnmE	NA|75aa|down_8|NZ_AP018274.1_2529886_2530111_-	pfam13619, KTSC, KTSC domain	NA|1711aa|down_9|NZ_AP018274.1_2530266_2535399_-	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment
GCF_002368375.1_ASM236837v1	NZ_AP018274	Calothrix sp. NIES-4101 plasmid plasmid1 DNA, complete genome	6	2576325-2576724	2,2,6	PILER-CR,CRT,CRISPRCasFinder	no	RT	cas3,DinG,cas6,RT	Unclear	TATTGCAATTAAAAATAATCCCTATCAGGGATT-----------GAAAC,TATTGCAATTAAAAATAATCCCTATCAGGGATTGAAAC,ATTGCAATTAAAAATAATCCCTATCAGGGATTGAAAC	49,38,37	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	5,5,5	5	Orphan	RT,c2c5_V-U5,csa3,Cas9_archaeal,WYL,cas3,cas10d,csc2gr7,csc1gr5,cas6,cas4,cas1,cas2,c2c9_V-U4,DinG,PD-DExK	NA|162aa|up_7|NZ_AP018274.1_2566993_2567479_+,NA|304aa|up_3|NZ_AP018274.1_2571412_2572324_-,NA|177aa|up_0|NZ_AP018274.1_2575269_2575800_-,NA|412aa|down_5|NZ_AP018274.1_2588191_2589427_-,NA|84aa|down_6|NZ_AP018274.1_2589498_2589750_-	NA|224aa|up_9|NZ_AP018274.1_2564403_2565075_+	COG0705, COG0705, Membrane associated serine protease [Amino acid transport and metabolism]	NA|454aa|up_8|NZ_AP018274.1_2565228_2566590_-	pfam13546, DDE_5, DDE superfamily endonuclease	NA|162aa|up_7|NZ_AP018274.1_2566993_2567479_+	NA	NA|104aa|up_6|NZ_AP018274.1_2567671_2567983_-	pfam14110, DUF4282, Domain of unknown function (DUF4282)	NA|482aa|up_5|NZ_AP018274.1_2568340_2569786_-	COG3670, COG3670, Lignostilbene-alpha,beta-dioxygenase and related enzymes [Secondary metabolites biosynthesis, transport, and catabolism]	NA|449aa|up_4|NZ_AP018274.1_2569995_2571342_+	COG1785, PhoA, Alkaline phosphatase [Inorganic ion transport and metabolism]	NA|304aa|up_3|NZ_AP018274.1_2571412_2572324_-	NA	NA|391aa|up_2|NZ_AP018274.1_2572701_2573874_+	TIGR03297, Ppyr-DeCO2ase, phosphonopyruvate decarboxylase	NA|360aa|up_1|NZ_AP018274.1_2573947_2575027_+	TIGR03301, PhnW-AepZ, 2-aminoethylphosphonate aminotransferase	NA|177aa|up_0|NZ_AP018274.1_2575269_2575800_-	NA	NA|822aa|down_0|NZ_AP018274.1_2576801_2579267_-	pfam05860, Haemagg_act, haemagglutination activity domain	NA|546aa|down_1|NZ_AP018274.1_2579415_2581053_-	pfam01816, LRV, Leucine rich repeat variant	NA|227aa|down_2|NZ_AP018274.1_2581166_2581847_-	pfam01816, LRV, Leucine rich repeat variant	NA|793aa|down_3|NZ_AP018274.1_2581905_2584284_-	pfam16734, Pilin_GH, Type IV pilin-like G and H, putative	NA|827aa|down_4|NZ_AP018274.1_2584437_2586918_-	pfam05860, Haemagg_act, haemagglutination activity domain	NA|412aa|down_5|NZ_AP018274.1_2588191_2589427_-	NA	NA|84aa|down_6|NZ_AP018274.1_2589498_2589750_-	NA	NA|524aa|down_7|NZ_AP018274.1_2589829_2591401_-	pfam05860, Haemagg_act, haemagglutination activity domain	RT|604aa|down_8|NZ_AP018274.1_2591512_2593324_-	TIGR04416, hypothetical_protein, group II intron reverse transcriptase/maturase	NA|833aa|down_9|NZ_AP018274.1_2593985_2596484_-	pfam05860, Haemagg_act, haemagglutination activity domain
