import pandas as pd
import glob
import os
all_files=pd.DataFrame()
for fn in glob.glob('./bigwig_ATAC_bamcoverage/*'):
    prefix = os.path.basename(fn).split('.e100')[0] 
    all_files.loc[prefix,'file']=fn
    all_files.loc[prefix,'class']='ATAC'
    all_files.loc[prefix,'classname']='ATAC'
for fn in glob.glob('./bigwig_PT_DNA/*'):
    prefix = os.path.basename(fn).split('.e100')[0] 
    if prefix !='files':
        all_files.loc[prefix,'file']=fn
        all_files.loc[prefix,'class']=prefix.split('.')[1]
        all_files.loc[prefix,'classname']=prefix.split('.')[1]

for fn in glob.glob('./bigwig_PT_RNA_stranded/*'):
    prefix = os.path.basename(fn).split('.RPKM')[0] 
    all_files.loc[prefix,'file']=fn
    all_files.loc[prefix,'class']='RNA'+prefix.split('.')[1]
    all_files.loc[prefix,'classname']='RNA'+prefix.split('.')[1]


for fn in glob.glob('./mba_mcg/*'):
    prefix = os.path.basename(fn).split('.bw')[0] 
    all_files.loc[prefix,'file']=fn
    all_files.loc[prefix,'class']='CGN'
    all_files.loc[prefix,'classname']='CGN'

for fn in glob.glob('./mba_mch/*'):
    prefix = os.path.basename(fn).split('.CHN')[0] 
    prefix=prefix+'.CHN'
    all_files.loc[prefix,'file']=fn
    all_files.loc[prefix,'class']='CHN'
    all_files.loc[prefix,'classname']='CHN'

for fn in glob.glob('./Subclass_chromatinstate_tracks/*'):
    prefix = os.path.basename(fn).split('_8states_dense')[0] 
    prefix=prefix+'_8states_dense'
    all_files.loc[prefix,'file']=fn
    all_files.loc[prefix,'class']='BED'
    all_files.loc[prefix,'classname']='BED'
for fn in glob.glob('./Q.10K/*'):
    prefix = os.path.basename(fn).split('.mcool')[0] 
    all_files.loc[prefix,'file']=fn
    all_files.loc[prefix,'class']='hic'
    all_files.loc[prefix,'classname']='hic'

all_files['cell_type']=[i.split('.')[0] for i in all_files.index]
all_files_filtered = all_files[~all_files['cell_type'].str.match(r'^\d{4}_')]

all_files_filtered.loc[:, 'cell_type_clean'] = (
    all_files_filtered['cell_type']
    .str.replace(r'^\d{3,4}_', '', regex=True)
)

all_files_filtered['cell_type_clean'] = all_files_filtered['cell_type_clean'].str.replace('_8states_dense','')

all_files_filtered['cell_type_clean'] = all_files_filtered['cell_type_clean'].str.replace('-','_')
del all_files_filtered['cell_type']
all_files_filtered['cell_type']=all_files_filtered['cell_type_clean']
del all_files_filtered['cell_type_clean']



df=pd.read_table('cell_metadata_with_cluster_annotation.csv',sep=',',index_col=0,header=0)

df1=df[['subclass','subclass_color','class']].drop_duplicates()
df1.index=['_'.join(i.split(' ')[1:]).replace('/','_').replace('-','_') for i in df1['subclass']]

df1['class']=[i.split(' ')[-1] for i in df1['class']]

all_files_filtered=all_files_filtered[all_files_filtered['cell_type']!='files']
all_files_filtered=all_files_filtered[all_files_filtered['cell_type']!='download']
all_files_filtered=all_files_filtered[all_files_filtered['cell_type']!='Q']
all_files_filtered=all_files_filtered[all_files_filtered['cell_type']!='media']
all_files_filtered=all_files_filtered[all_files_filtered['cell_type']!='db']
all_files_filtered=all_files_filtered[all_files_filtered['cell_type']!='log']
all_files_filtered['color']=[df1.loc[i,'subclass_color'] for i in all_files_filtered['cell_type']]

all_files_filtered['cellclass']=[df1.loc[i,'class'] for i in all_files_filtered['cell_type']]

all_files_filtered['cellclassname']=all_files_filtered['cellclass']
all_files_filtered=all_files_filtered[['cell_type','typename','color','class','classname']]
all_files_filtered=all_files_filtered.drop_duplicates()

all_files_filtered.to_csv('NewpairTag_cell.tsv',sep='\t',header=None,index=None)


import os
for ele in all_files_filtered.index:
    if all_files_filtered.loc[ele, 'class']=='BED':
        cmd = f"cp {all_files_filtered.loc[ele, 'file']} " \
          f"./cCREs/{all_files_filtered.loc[ele, 'cell_type']}.bed"
    
    elif all_files_filtered.loc[ele, 'class']=='hic':
        cmd = f"cp {all_files_filtered.loc[ele, 'file']} " \
        f"./hic/{all_files_filtered.loc[ele, 'cell_type']}." \
        f"{all_files_filtered.loc[ele, 'class']}"
    else:
        cmd = f"cp {all_files_filtered.loc[ele, 'file']} " \
          f"./bigwig/{all_files_filtered.loc[ele, 'cell_type']}." \
          f"{all_files_filtered.loc[ele, 'class']}.bw"
        os.system(cmd)
import pandas as pd
import glob,os
cells=pd.DataFrame()
for fn in glob.glob('./bigwig/*'):
    prefix = os.path.basename(fn).split('.bw')[0]
    cells.loc[prefix,'typename']=''
cells['typename']=[i.replace('.',',') for i in cells.index]
cells['class']=[ i.split('.')[-1] for i in cells.index]
cells['classname']=[ i.split('.')[-1] for i in cells.index]

for fn in glob.glob('./cCREs/*'):
    prefix = os.path.basename(fn).split('.bed')[0]+'.state'
    cells.loc[prefix,'typename']=''
cells['typename']=[i.replace('.',',') for i in cells.index]
cells['class']='state' 
cells['classname']='state' 
for fn in glob.glob('./hic/*'):
    prefix = os.path.basename(fn)
    cells.loc[prefix,'typename']=''
cells['typename']=[i.replace('.',',') for i in cells.index]
cells['class']='hic' 
cells['classname']='hic' 

df=pd.read_table('cell_metadata_with_cluster_annotation.csv',sep=',',index_col=0,header=0)

df1=df[['subclass','subclass_color','class']].drop_duplicates()
df1.index=['_'.join(i.split(' ')[1:]).replace('/','_').replace('-','_') for i in df1['subclass']]

df1['class']=[i.split(' ')[-1] for i in df1['class']]

cells['cells']=[ i.split('.')[0] for i in cells.index]

cells['color']=[ df1.loc[i,'subclass_color'] for i in cells['cells']]

cells