[pylogparser] A Python project that provides common parser for log files. It is also connected with ElasticSearch in order to centralize the data and to provide a sophisticated RESTful API to request the data.
"""
System imports.
"""
from __future__ import print_function
import os
import tempfile
"""
Pylogparser imports.
"""
import pylogparser
from pylogparser import LogParser
from pylogparser import dump_log_es
from pylogparser import load_log_es
from pylogparser import tree
"""
First we define where to find the demonstration data.
"""
demodir = os.path.abspath(os.path.join(os.path.dirname(pylogparser.__file__),
"demo"))
"""
We create a log parser. All the parser data will be stored in the 'data'
instance parameter.
"""
parser = LogParser()
"""
We examplify here the parser object singleton property .
"""
print(parser)
for i in range(3):
p = LogParser()
print(p)
"""
We parse data from log files containing multiple processings of the same type.
"""
for basename in ("fsreconall_1.txt", "fsreconall_2.txt"):
logfile = os.path.join(demodir, basename)
parser.parse_logfile(
logfile=logfile,
job_pattern="job_\d+",
timestamp_pattern="\d{4}-\d{2}-\d{2}T\d{2}:\d{2}",
custom_patterns={
"code_in_study": {
"regex": "subjectid = \d{4}",
"splitter": (" = ", 1)
},
"cmd": {
"regex": "cmd = .*",
"splitter": (" = ", 1)
},
"exitcode": {
"regex": "exitcode = \d",
"splitter": (" = ", 1)
},
"hostname": {
"regex": "hostname = .*",
"splitter": (" = ", 1)
}
},
hierarchy={
"job_id": {
"code_in_study": {
"timestamp": {
"custom_data": None
}
}
}
},
jobs_alias="project1_freesurfer")
print("-------", basename)
tree(parser.data, level=2, display_content=False)
"""
We obtain 3 FreeSurfer records from 'fsreconall_1.txt':
------- fsreconall_1.txt
+-project1_freesurfer
| +-0001
| | +-2015-11-10T01:33
| +-0002
| | +-2015-11-10T01:35
| +-0003
| | +-2015-11-10T01:38
And 1 more from 'fsreconall_2.txt':
------- fsreconall_2.txt
+-project1_freesurfer
| +-0001
| | +-2015-11-10T01:33
| +-0002
| | +-2015-11-10T01:35
| +-0003
| | +-2015-12-03T17:04
| | +-2015-11-10T01:38
"""
"""
We now parse JSON struct generated from two processings.
"""
for name in ("dtifit_0001", "dtifit_0002"):
dirfiles = {
os.path.join(demodir, name, "runtime.json"): True,
os.path.join(demodir, name, "inputs.json"): False,
os.path.join(demodir, name, "outputs.json"): False
}
parser.parse_logdir(
logfiles=dirfiles,
job_name="project1_dtifit",
timestamp_key="timestamp",
hierarchy={
"job_name": {
"subjectid": {
"timestamp": {
"custom_data": None
}
}
}
},
extract_keys=["subjectid"])
print("-------", name)
tree(parser.data, level=2, display_content=False)
"""
We obtain 2 DTIFit extra records:
------- dtifit_0001
+-project1_dtifit
| +-0001
| | +-2016-07-13T09:20:00.007074
+-project1_freesurfer
| +-0001
| | +-2015-11-10T01:33
| +-0002
| | +-2015-11-10T01:35
| +-0003
| | +-2015-12-03T17:04
| | +-2015-11-10T01:38
------- dtifit_0002
+-project1_dtifit
| +-0001
| | +-2016-07-13T09:20:00.007074
| +-0002
| | +-2016-07-13T09:16:32.993929
+-project1_freesurfer
| +-0001
| | +-2015-11-10T01:33
| +-0002
| | +-2015-11-10T01:35
| +-0003
| | +-2015-12-03T17:04
| | +-2015-11-10T01:38
"""
"""
We show how to organize the presneted parsing in a single Json configuration
file.
"""
descfile = os.path.join(demodir, "pylogparser_demo.json")
modify_descfile = tempfile.NamedTemporaryFile(suffix=".json").name
with open(descfile, "rt") as open_file:
jbuffer = open_file.read().replace("DEMODIR", demodir)
with open(modify_descfile, "wt") as open_file:
open_file.write(jbuffer)
LogParser.load(modify_descfile, verbose=0)
print("------- load 'project2' from description")
tree(parser.data, level=2, display_content=False)
"""
The same data are parsed and associated to 'project2':
------- load 'project2' from description
+-project1_dtifit
| +-0001
| | +-2016-07-13T09:20:00.007074
| +-0002
| | +-2016-07-13T09:16:32.993929
+-project2_dtifit
| +-0001
| | +-2016-07-13T09:20:00.007074
| +-0002
| | +-2016-07-13T09:16:32.993929
+-project2_freesurfer
| +-0001
| | +-2015-11-10T01:33
| +-0002
| | +-2015-11-10T01:35
| +-0003
| | +-2015-12-03T17:04
| | +-2015-11-10T01:38
+-project1_freesurfer
| +-0001
| | +-2015-11-10T01:33
| +-0002
| | +-2015-11-10T01:35
| +-0003
| | +-2015-12-03T17:04
| | +-2015-11-10T01:38
"""
"""
We now interact with ElasticSearch and save the log parsed data.
"""
print("------- save data in elasticsearch")
dump_log_es(parser.data, "boss", "alpine", url="localhost", port=9200,
verbose=2)
"""
We now dump all the saved datain elasticsearch and check everything is all
right.
"""
data = load_log_es("boss", "alpine", url="localhost", port=9200, verbose=1)
print("------- load data from elasticsearch")
tree(parser.data, level=2, display_content=False)
record1 = data["project1_dtifit"]["0001"]["2016-07-13T09:20:00.007074"]
record2 = parser.data["project1_dtifit"]["0001"]["2016-07-13T09:20:00.007074"]
assert record1 == record2
"""
All right, now search all jobs final status.
"""
print("------- check status")
status = match(
match_name="exitcode", match_value=None, login="boss", password="alpine",
url="localhost", port=9200, index=None, doc_type=None, verbose=1)
"""
------- check status
Matches for 'exitcode=None'...
{u'project1_dtifit': {u'0001': None, u'0002': None},
u'project1_freesurfer': {u'0001': u'0', u'0002': u'0', u'0003': u'0'},
u'project2_dtifit': {u'0001': None, u'0002': None},
u'project2_freesurfer': {u'0001': u'0', u'0002': u'0', u'0003': u'0'}}
"""
"""
Focus now on a specific processing.
"""
print("------- check status of one processing")
status = match(
match_name="exitcode", match_value=None, login="boss", password="alpine",
url="localhost", port=9200, index="project1_freesurfer", doc_type=None,
verbose=1)
"""
------- check status of one processing
Matches for 'exitcode=None'...
{'project1_freesurfer': {u'0001': u'0', u'0002': u'0', u'0003': u'0'}}
"""
"""
Finally search where an error occured during processings.
"""
print("------- check errors")
status = match(
match_name="exitcode", match_value="1", login="boss", password="alpine",
url="localhost", port=9200, index=None, doc_type=None, verbose=1)
"""
------- check errors
Matches for 'exitcode=1'...
{u'project1_freesurfer': {u'0003': u'1'},
u'project2_freesurfer': {u'0003': u'1'}}
"""