Skip to content
This repository was archived by the owner on Oct 4, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#PyCharm workdir
.idea

*.pyc
#Scripts output directory content
output/*
output/*.json
30 changes: 11 additions & 19 deletions IHEC_json_converter/bisulfite.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,21 @@
__author__ = 'kelley'

import json
from general import convert_to_IHEC_format

VERSION='1.6'

def bisulfite_wrapper(assembly, taxon_id):
url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=whole-genome%20shotgun%20bisulfite%20sequencing'
# Used to set is_main
BISULFATE_TRACK_HIEARCHY = {'methylation_profile': ['methylation state at CpG', 'signal']}

# Used to set is_main
track_hierarchy = {'methylation_profile': ['methylation state at CpG', 'methylation state at CHH']}

def dataset_additions_f(experiment, json_object):
def bisulfate_addition(experiment, json_object):
#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'

#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = 'DNA Methylation'
json_object['experiment_attributes']['assay_type'] = 'WGB-Seq'
return json_object

return json_object

return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)



if __name__ == "__main__":
data = bisulfite_wrapper(assembly='hg19', taxon_id=9606)
with open('../output/bisulfite_v%s.json' % VERSION, 'w+') as outfile:
json.dump(data, outfile, indent=4)
# if __name__ == "__main__":
# data = bisulfite_wrapper(assembly='hg19', taxon_id=9606)
# with open('../output/bisulfite_v%s.json' % VERSION, 'w+') as outfile:
# json.dump(data, outfile, indent=4)
30 changes: 11 additions & 19 deletions IHEC_json_converter/chipseq.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,26 @@
__author__ = 'kelley'

import json
from general import convert_to_IHEC_format

VERSION='1.6'

def chip_seq_wrapper(assembly, taxon_id, target):
url = 'https://www.encodeproject.org/search/?type=experiment&assay_term_name=ChIP-seq&target.name=%s-human' % target

# Used to set is_main
track_hierarchy = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
CHIPSEQ_TRACK_HIEARCHY = {'peak_calls': ['optimal idr thresholded peaks', 'conservative idr thresholded peaks',
'replicated peaks', 'peaks', 'hotspots'],
'signal': ['signal p-value', 'fold change over control', 'signal', 'raw signal']}

def dataset_additions_f(experiment, json_object):

#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']

return json_object
def chip_seq_addition(experiment, json_object):

return convert_to_IHEC_format(url, assembly, taxon_id, track_hierarchy, dataset_additions_f)
#Set experiment_type
json_object['experiment_attributes']['experiment_type'] = experiment['target']['label']

return json_object




if __name__ == "__main__":
targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
for t in targets:
data = chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
with open('../output/%s_v%s.json' % (t, VERSION), 'w+') as outfile:
json.dump(data, outfile, indent=4)
# if __name__ == "__main__":
# targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
# for t in targets:
# data = chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
# with open('../output/%s_v%s.json' % (t, VERSION), 'w+') as outfile:
# json.dump(data, outfile, indent=4)
42 changes: 4 additions & 38 deletions IHEC_json_converter/fetch_all_exp_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import getopt
import json
from datetime import datetime
import rnaseq, bisulfite, chipseq
from reference_epigenome_experiments import collect_experiments

def main(argv):
opts, args = getopt.getopt(argv, "", ["assembly=", "taxon-id="])
Expand All @@ -22,43 +22,9 @@ def main(argv):

date_str = datetime.now().date()

#Todo: Merge experiments as a single JSON

#Whole-Genome Bisulfite Sequencing experiments
print("Processing WGB-Seq...")
try:
data = bisulfite.bisulfite_wrapper(assembly='hg19', taxon_id=9606)
filename = 'WGB-Seq_%s_%s_%s.json' % (taxon_id, assembly, date_str)
output_file(data, filename)
print("Done.")
except Exception as e:
print('An error occured while fetching WGB-Seq experiments: ' + e.message)
print

#RNA-Sequencing experiments
print("Processing RNA-Seq...")
try:
data = rnaseq.rna_seq_wrapper(assembly=assembly, taxon_id=taxon_id)
filename = 'RNA-Seq_%s_%s_%s.json' % (taxon_id, assembly, date_str)
output_file(data, filename)
print("Done.")
except Exception as e:
print('An error occured while fetching RNA-Seq experiments: ' + e.message)
print

#ChIP-Seq experiments
targets = ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3', 'H3K9me3']
for t in targets:
print("Processing ChIP-Seq %s..." % t)
try:
data = chipseq.chip_seq_wrapper(assembly='hg19', taxon_id=9606, target=t)
filename = 'ChIP-Seq_%s_%s_%s_%s.json' % (taxon_id, assembly, t, date_str)
output_file(data, filename)
print("Done.")
except Exception as e:
print('An error occured while fetching ChIP-Seq %s experiments: ' % t + e.message)
print
print("Operation completed.")
filename = 'ENCODE.{}.{}.{}.json'.format(taxon_id, assembly, date_str)
data = collect_experiments(assembly, taxon_id)
output_file(data, filename)



Expand Down
Loading