diff --git a/RedHatNLP/CS506_Project_Paper.pdf b/RedHatNLP/CS506_Project_Paper.pdf new file mode 100644 index 00000000..9d8511f7 Binary files /dev/null and b/RedHatNLP/CS506_Project_Paper.pdf differ diff --git a/RedHatNLP/Deliverable2.ipynb b/RedHatNLP/Deliverable2.ipynb new file mode 100644 index 00000000..f70be830 --- /dev/null +++ b/RedHatNLP/Deliverable2.ipynb @@ -0,0 +1,756 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Due 02/28/2021", + "provenance": [], + "collapsed_sections": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RT1xSMJ40ewc", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "292a57a0-8172-4dd3-d388-8b6eecfe4866" + }, + "source": [ + "# Web scraping (Kyle, due 02/26/2021)\n", + "# BeautifulSoup for web scraping\n", + "\n", + "from urllib.request import urlopen\n", + "from bs4 import BeautifulSoup\n", + "from google.colab import files\n", + "import pandas as pd\n", + "import io\n", + "import numpy as np\n", + "import dateutil\n", + "from dateutil import parser\n", + "import textblob\n", + "from textblob import TextBlob\n", + "import nltk\n", + "import requests\n", + "from textblob import Word\n", + "import matplotlib.pyplot as plt\n", + "from nltk.stem.snowball import SnowballStemmer\n", + "nltk.download('punkt')\n", + "\n", + "# upload file\n", + "#uploaded = files.upload()\n", + "#read array\n", + "#df2 = pd.read_csv(io.BytesIO(uploaded['logs - Sheet1.csv']))\n", + "#turn to flat list\n", + "#array_pages = df2.to_numpy()\n", + "#array_pages = np.ndarray.tolist(array_pages)\n", + "#array_pages = [item for sublist in array_pages for item in sublist]\n", + "\n", + "#url core needed to pull\n", + "website = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com\"\n", + "base = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/canary-release-openshift-origin-installer-e2e-aws-4.5-cnv/\"\n", + "ending = \"build-log.txt\"\n", + "url = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/canary-release-openshift-origin-installer-e2e-aws-4.5-cnv/1300557127638585344/build-log.txt\"\n", + "page = requests.get(base) \n", + "data = page.text\n", + "soup = BeautifulSoup(data)\n", + "links = []\n", + "for link in soup.find_all('a'):\n", + " links.append(link.get('href'))\n", + "links = links[1:-1]\n", + "\n", + "final_array = []\n", + "# create array of urls\n", + "for x in range(len(links)):\n", + " final_array.append(str(website) + str(links[x]) + str(ending))\n", + "\n", + "\n", + "# pull all urls logs and store in 2-d array where array_of_logs[x] is a build-log file and \n", + "# array_of_logs[x][y] is an individual log line split by new line\n", + "array_of_logs = []\n", + "for x in range(len(final_array)):\n", + " page = urlopen(final_array[x])\n", + " html_bytes = page.read()\n", + " array_of_logs.append(str(html_bytes).split('\\\\n'))\n", + " \n", + "# first log\n", + "print(array_of_logs[0])\n", + "\n", + "\n", + "# Analysis on the log data. Trying to find a framework. API (Ningxiao, Parker, Tianze, Hong)\n", + "# Identify limitations with data and potential risks of achieving project goals.\n", + "\n", + "\n", + "# ******* Tianze *******\n", + "#ignore useless \"Waiting for setup to finish...\"\n", + "def ignoreWaiting(logs):\n", + " for i in range(len(logs)):\n", + " if \"Waiting for setup to finish...\" in logs[i]:\n", + " logs[i]=\"\"\n", + " return logs\n", + "# ******* Tianze *******\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n", + "[\"b'2020/09/26 22:20:37 ci-operator version v20200924-c41f44a\", '2020/09/26 22:20:37 No source defined', '2020/09/26 22:20:37 Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.5-ci', '2020/09/26 22:20:37 Using namespace https://console.svc.ci.openshift.org/k8s/cluster/projects/ci-op-xl3p51qp', '2020/09/26 22:20:37 Running [release-inputs], [images], [release:latest], e2e-aws', '2020/09/26 22:20:37 Creating namespace ci-op-xl3p51qp', '2020/09/26 22:20:37 Setting up pipeline imagestream for the test', '2020/09/26 22:20:37 Created secret e2e-aws-cnv-cluster-profile', '2020/09/26 22:20:37 Created secret pull-secret', '2020/09/26 22:20:37 Created PDB for pods with openshift.io/build.name label', '2020/09/26 22:20:37 Created PDB for pods with created-by-ci label', '2020/09/26 22:20:37 Tagged shared images from ocp/4.5:${component}, images will be pullable from registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:${component}', '2020/09/26 22:20:44 Importing release image latest', '2020/09/26 22:21:50 Imported release 4.5.0-0.ci-2020-09-22-154858 created at 2020-09-22 15:56:05 +0000 UTC with 110 images to tag release:latest', '2020/09/26 22:21:50 Acquiring lease for \"aws-quota-slice\"', '2020/09/26 22:21:50 Acquired lease \"c7e13d8f-f2a4-4ba7-91c2-53cd46d5c2eb\" for \"aws-quota-slice\"', '2020/09/26 22:21:50 Executing template e2e-aws', '2020/09/26 22:21:50 Creating or restarting template instance', '2020/09/26 22:21:50 Template instance e2e-aws already deleted, do not need to wait any longer', '2020/09/26 22:21:50 Waiting for template instance to be ready', '2020/09/26 22:21:52 Running pod e2e-aws-cnv', '2020/09/26 22:21:55 Container cli in pod e2e-aws-cnv completed successfully', '2020/09/26 22:21:57 Container cli-tests in pod e2e-aws-cnv completed successfully', '2020/09/26 22:56:18 Container setup in pod e2e-aws-cnv completed successfully', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'Waiting for setup to finish...', 'secret/support created', 'which: no docker in (/tmp/shared:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin:/go/bin)', './hack/deploy.sh', '+ source hack/common.sh', '++ set -e', '++ source hack/defaults', \"+++++ dirname \\\\'hack/defaults[0]\\\\'\", '++++ readlink -e hack/../', '+++ PROJECT_ROOT=/go/src/github.com/kubevirt/hyperconverged-cluster-operator', '+++ source /go/src/github.com/kubevirt/hyperconverged-cluster-operator/hack/config', '++++ KUBEVIRT_VERSION=v0.29.2', '++++ CDI_VERSION=v1.18.0', '++++ NETWORK_ADDONS_VERSION=0.38.0', '++++ SSP_VERSION=v1.0.35', '++++ NMO_VERSION=v0.6.0', '++++ HPPO_VERSION=v0.4.3', '++++ HPP_VERSION=v0.4.0', '++++ CONVERSION_CONTAINER_VERSION=v2.0.0', '++++ VMWARE_CONTAINER_VERSION=v2.0.0-3', '++++ VM_IMPORT_VERSION=v0.0.2', '++++ CONTAINER_REGISTRY=quay.io/kubevirt', '++++ mktemp -d', '+++ TEMP_DIR=/tmp/tmp.rVClBhsVd1', '+++ WAIT_TIMEOUT=450s', '+++ CDI_CONTAINER_REGISTRY=docker.io/kubevirt', '+++ KUBEVIRT_CONTAINER_REGISTRY=docker.io/kubevirt', '+++ NETWORK_ADDONS_CONTAINER_REGISTRY=quay.io/kubevirt', '+++ SSP_CONTAINER_REGISTRY=quay.io/fromani', '+++ CDI_OPERATOR_NAME=cdi-operator', '++++ basename docker.io/kubevirt', '+++ CDI_DOCKER_PREFIX=kubevirt', '+++ CONTROLLER_IMAGE=cdi-controller', '+++ IMPORTER_IMAGE=cdi-importer', '+++ CLONER_IMAGE=cdi-cloner', '+++ APISERVER_IMAGE=cdi-apiserver', '+++ UPLOADPROXY_IMAGE=cdi-uploadproxy', '+++ UPLOADSERVER_IMAGE=cdi-uploadserver', '+++ NETWORK_ADDONS_MULTUS_IMAGE=', '+++ NETWORK_ADDONS_LINUX_BRIDGE_CNI_IMAGE=', '+++ NETWORK_ADDONS_LINUX_BRIDGE_MARKER_IMAGE=', '+++ NETWORK_ADDONS_KUBEMACPOOL_IMAGE=', '+++ NETWORK_ADDONS_NMSTATE_HANDLER_IMAGE=', '+++ NETWORK_ADDONS_OVS_CNI_PLUGIN_IMAGE=', '+++ NETWORK_ADDONS_OVS_CNI_MARKER_IMAGE=', '+++ VM_IMPORT_CONTAINER_REGISTRY=quay.io/kubevirt', '+++ VM_IMPORT_IMAGE=vm-import-operator', '+++ echo docker.io/kubevirt', '+++ grep brew', \"+++ OPERATOR_MANIFESTS=\\\\'https://github.com/kubevirt/kubevirt/releases/download/v0.29.2/kubevirt-operator.yaml\", 'https://github.com/kubevirt/containerized-data-importer/releases/download/v1.18.0/cdi-operator.yaml.j2', 'https://github.com/kubevirt/cluster-network-addons-operator/releases/download/0.38.0/network-addons-config.crd.yaml', 'https://github.com/kubevirt/cluster-network-addons-operator/releases/download/0.38.0/namespace.yaml', 'https://github.com/kubevirt/cluster-network-addons-operator/releases/download/0.38.0/operator.yaml', 'https://github.com/MarSik/kubevirt-ssp-operator/releases/download/v1.0.35/kubevirt-ssp-operator-crd.yaml', 'https://github.com/MarSik/kubevirt-ssp-operator/releases/download/v1.0.35/kubevirt-ssp-operator.yaml', \"https://github.com/kubevirt/vm-import-operator/releases/download/v0.0.2/operator.yaml\\\\'\", \"+++ OPERATOR_CRS=\\\\'https://github.com/kubevirt/kubevirt/releases/download/v0.29.2/kubevirt-cr.yaml\", 'https://github.com/kubevirt/containerized-data-importer/releases/download/v1.18.0/cdi-operator-cr.yaml', 'https://github.com/kubevirt/cluster-network-addons-operator/releases/download/0.38.0/network-addons-config-example.cr.yaml', 'https://github.com/MarSik/kubevirt-ssp-operator/releases/download/v1.0.35/kubevirt-ssp-operator-cr.yaml', \"https://github.com/kubevirt/vm-import-operator/releases/download/v0.0.2/vmimportconfig_cr.yaml\\\\'\", '++ source cluster/kubevirtci.sh', '+++ export KUBEVIRT_PROVIDER=k8s-1.17', '+++ KUBEVIRT_PROVIDER=k8s-1.17', '+++ KUBEVIRTCI_VERSION=9d224d0c22e9ed2ca7588ccf3a258d82e160b195', '+++ KUBEVIRTCI_PATH=/go/src/github.com/kubevirt/hyperconverged-cluster-operator/_kubevirtci', '++ CDI_OPERATOR_URL=https://github.com/kubevirt/containerized-data-importer/releases/download/v1.18.0/cdi-operator.yaml', '++ KUBEVIRT_OPERATOR_URL=https://github.com/kubevirt/kubevirt/releases/download/v0.29.2/kubevirt-operator.yaml', '++ CNA_URL_PREFIX=https://github.com/kubevirt/cluster-network-addons-operator/releases/download/0.38.0', '++ mem_size=5120M', '++ num_nodes=1', '++ KUBEVIRT_PROVIDER=k8s-1.17', '++ BASE_PATH=/go/src/github.com/kubevirt/hyperconverged-cluster-operator', '+++ kubevirtci::path', '+++ echo -n /go/src/github.com/kubevirt/hyperconverged-cluster-operator/_kubevirtci', '++ KUBEVIRTCI_PATH=/go/src/github.com/kubevirt/hyperconverged-cluster-operator/_kubevirtci', '++ CMD=', '++ KUBECTL=', '++ TEST_PATH=tests/func-tests', '++ TEST_OUT_PATH=tests/func-tests/_out', '++ JOB_TYPE=prow', '++ SSP_URL_PREFIX=https://github.com/MarSik/kubevirt-ssp-operator/releases/download/v1.0.35', '++ VM_IMPORT_URL_PREFIX=https://github.com/kubevirt/vm-import-operator/releases/download/v0.0.2', '+++ which kubectl', '++ KUBECTL=/tmp/shared/kubectl', \"++ \\\\'[\\\\' -z \\\\'\\\\' \\\\']\\\\'\", \"++ \\\\'[\\\\' -z /tmp/shared/kubectl \\\\']\\\\'\", '++ CMD=kubectl', '+ HCO_IMAGE=quay.io/kubevirt/hyperconverged-cluster-operator:latest', '+ HCO_NAMESPACE=kubevirt-hyperconverged', '+ HCO_KIND=hyperconvergeds', '+ HCO_RESOURCE_NAME=kubevirt-hyperconverged', '+ CI=', \"+ \\\\'[\\\\' \\\\'\\\\' == CI \\\\']\\\\'\", \"+ \\\\'[\\\\' e2e-aws-cnv == hco-e2e-aws \\\\']\\\\'\", \"+ \\\\'[\\\\' e2e-aws-cnv == e2e-aws-cnv \\\\']\\\\'\", \"+ echo \\\\'deploying on AWS CI\\\\'\", 'deploying on AWS CI', '+ CI=true', '+ rm -rf _out/', '+ cp -r deploy _out/', \"+ \\\\'[\\\\' -n \\\\'registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:${component}\\\\' \\\\']\\\\'\", '+ component=hyperconverged-cluster-operator', \"++ eval echo \\\\'registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:${component}\\\\'\", '+++ echo registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:hyperconverged-cluster-operator', '+ HCO_IMAGE=registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:hyperconverged-cluster-operator', \"+ sed -i \\\\'s|image: quay.io/kubevirt/hyperconverged-cluster-operator:.*$|image: registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:hyperconverged-cluster-operator|g\\\\' _out/operator.yaml\", '+ kubectl create ns kubevirt-hyperconverged', '+ true', '+ namespaces=(\"openshift\")', \"+ for namespace in \\\\'${namespaces[@]}\\\\'\", '++ kubectl get ns openshift', '+ [[ NAME STATUS AGE', \"openshift Active 18m == \\\\'\\\\' ]]\", \"+ \\\\'[\\\\' kubectl == oc \\\\']\\\\'\", '++ kubectl config current-context', '+ kubectl config set-context admin --namespace=kubevirt-hyperconverged', 'Context \"admin\" modified.', '+ trap status EXIT', '+ CONTAINER_ERRORED=', '+ kubectl create -f _out/cluster_role.yaml', 'role.rbac.authorization.k8s.io/cluster-network-addons-operator created', 'role.rbac.authorization.k8s.io/kubevirt-operator created', 'role.rbac.authorization.k8s.io/cdi-operator created', 'role.rbac.authorization.k8s.io/hostpath-provisioner-operator created', 'clusterrole.rbac.authorization.k8s.io/hyperconverged-cluster-operator created', 'clusterrole.rbac.authorization.k8s.io/cluster-network-addons-operator created', 'clusterrole.rbac.authorization.k8s.io/kubevirt-operator created', 'clusterrole.rbac.authorization.k8s.io/kubevirt-ssp-operator created', 'clusterrole.rbac.authorization.k8s.io/cdi-operator created', 'clusterrole.rbac.authorization.k8s.io/node-maintenance-operator created', 'clusterrole.rbac.authorization.k8s.io/hostpath-provisioner-operator created', 'clusterrole.rbac.authorization.k8s.io/vm-import-operator created', '+ kubectl create -f _out/service_account.yaml', 'serviceaccount/cdi-operator created', 'serviceaccount/cluster-network-addons-operator created', 'serviceaccount/hostpath-provisioner-operator created', 'serviceaccount/hyperconverged-cluster-operator created', 'serviceaccount/kubevirt-operator created', 'serviceaccount/kubevirt-ssp-operator created', 'serviceaccount/node-maintenance-operator created', 'serviceaccount/vm-import-operator created', '+ kubectl create -f _out/cluster_role_binding.yaml', 'rolebinding.rbac.authorization.k8s.io/cluster-network-addons-operator created', 'rolebinding.rbac.authorization.k8s.io/kubevirt-operator created', 'rolebinding.rbac.authorization.k8s.io/cdi-operator created', 'rolebinding.rbac.authorization.k8s.io/hostpath-provisioner-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/hyperconverged-cluster-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/cluster-network-addons-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/kubevirt-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/kubevirt-ssp-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/cdi-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/node-maintenance-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/hostpath-provisioner-operator created', 'clusterrolebinding.rbac.authorization.k8s.io/vm-import-operator created', '+ kubectl create -f _out/crds/', 'customresourcedefinition.apiextensions.k8s.io/networkaddonsconfigs.networkaddonsoperator.network.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/cdis.cdi.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/hyperconvergeds.hco.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/v2vvmwares.v2v.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/ovirtproviders.v2v.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/hostpathprovisioners.hostpathprovisioner.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/kubevirts.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/nodemaintenances.nodemaintenance.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/kubevirtcommontemplatesbundles.ssp.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/kubevirtmetricsaggregations.ssp.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/kubevirtnodelabellerbundles.ssp.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/kubevirttemplatevalidators.ssp.kubevirt.io created', 'customresourcedefinition.apiextensions.k8s.io/vmimportconfigs.v2v.kubevirt.io created', \"+ \\\\'[\\\\' true \\\\'!=\\\\' true \\\\']\\\\'\", \"+ sed -E \\\\'s|^(\\\\\\\\s*)- name: KVM_EMULATION$|\\\\\\\\1- name: KVM_EMULATION\\\\\", '\\\\\\\\1 value: \"true\"|\\\\\\'', '+ cat _out/operator-ci.yaml', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: hyperconverged-cluster-operator', ' name: hyperconverged-cluster-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: hyperconverged-cluster-operator', ' strategy: {}', ' template:', ' metadata:', ' labels:', ' name: hyperconverged-cluster-operator', ' spec:', ' containers:', ' - command:', ' - hyperconverged-cluster-operator', ' env:', ' - name: KVM_EMULATION', ' value: \"true\"', ' - name: OPERATOR_IMAGE', ' value: quay.io/kubevirt/hyperconverged-cluster-operator:1.1.0', ' - name: OPERATOR_NAME', ' value: hyperconverged-cluster-operator', ' - name: OPERATOR_NAMESPACE', ' value: kubevirt-hyperconverged', ' - name: POD_NAME', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.name', ' - name: WATCH_NAMESPACE', ' - name: CONVERSION_CONTAINER', ' value: quay.io/kubevirt/kubevirt-v2v-conversion:v2.0.0', ' - name: VMWARE_CONTAINER', ' value: quay.io/kubevirt/kubevirt-vmware:v2.0.0-3', ' - name: SMBIOS', ' value: |-', '+ kubectl create -f _out/operator-ci.yaml', ' Family: KubeVirt', ' Manufacturer: KubeVirt', ' Product: None', ' - name: MACHINETYPE', ' - name: HCO_KV_IO_VERSION', ' value: 1.1.0', ' - name: KUBEVIRT_VERSION', ' value: v0.29.2', ' - name: CDI_VERSION', ' value: v1.18.0', ' - name: NETWORK_ADDONS_VERSION', ' value: 0.38.0', ' - name: SSP_VERSION', ' value: v1.0.35', ' - name: NMO_VERSION', ' value: v0.6.0', ' - name: HPPO_VERSION', ' value: v0.4.3', ' - name: VM_IMPORT_VERSION', ' value: v0.0.2', ' image: registry.svc.ci.openshift.org/ci-op-xl3p51qp/stable:hyperconverged-cluster-operator', ' imagePullPolicy: IfNotPresent', ' name: hyperconverged-cluster-operator', ' readinessProbe:', ' exec:', ' command:', ' - stat', ' - /tmp/operator-sdk-ready', ' failureThreshold: 1', ' initialDelaySeconds: 5', ' periodSeconds: 5', ' resources: {}', ' serviceAccountName: hyperconverged-cluster-operator', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: cluster-network-addons-operator', ' name: cluster-network-addons-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: cluster-network-addons-operator', ' strategy:', ' type: Recreate', ' template:', ' metadata:', ' labels:', ' name: cluster-network-addons-operator', ' spec:', ' containers:', ' - env:', ' - name: MULTUS_IMAGE', ' value: nfvpe/multus:v3.4.1', ' - name: LINUX_BRIDGE_IMAGE', ' value: quay.io/kubevirt/cni-default-plugins:v0.8.1', ' - name: LINUX_BRIDGE_MARKER_IMAGE', ' value: quay.io/kubevirt/bridge-marker:0.2.0', ' - name: NMSTATE_HANDLER_IMAGE', ' value: quay.io/nmstate/kubernetes-nmstate-handler:v0.20.0', ' - name: OVS_CNI_IMAGE', ' value: quay.io/kubevirt/ovs-cni-plugin:v0.11.0', ' - name: OVS_MARKER_IMAGE', ' value: quay.io/kubevirt/ovs-cni-marker:v0.11.0', ' - name: KUBEMACPOOL_IMAGE', ' value: quay.io/kubevirt/kubemacpool:v0.14.0', ' - name: MACVTAP_CNI_IMAGE', ' value: quay.io/kubevirt/macvtap-cni:v0.2.0', ' - name: OPERATOR_IMAGE', ' value: quay.io/kubevirt/cluster-network-addons-operator:0.38.0', ' - name: OPERATOR_NAME', ' value: cluster-network-addons-operator', ' - name: OPERATOR_VERSION', ' value: 0.38.0', ' - name: OPERATOR_NAMESPACE', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.namespace', ' - name: OPERAND_NAMESPACE', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.namespace', ' - name: POD_NAME', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.name', ' - name: WATCH_NAMESPACE', ' image: quay.io/kubevirt/cluster-network-addons-operator:0.38.0', ' imagePullPolicy: IfNotPresent', ' name: cluster-network-addons-operator', ' resources: {}', ' serviceAccountName: cluster-network-addons-operator', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: virt-operator', ' name: virt-operator', 'spec:', ' replicas: 2', ' selector:', ' matchLabels:', ' kubevirt.io: virt-operator', ' strategy:', ' type: RollingUpdate', ' template:', ' metadata:', ' annotations:', ' scheduler.alpha.kubernetes.io/critical-pod: \"\"', ' labels:', ' kubevirt.io: virt-operator', ' prometheus.kubevirt.io: \"\"', ' name: virt-operator', ' spec:', ' affinity:', ' podAntiAffinity:', ' preferredDuringSchedulingIgnoredDuringExecution:', ' - podAffinityTerm:', ' labelSelector:', ' matchExpressions:', ' - key: kubevirt.io', ' operator: In', ' values:', ' - virt-operator', ' topologyKey: kubernetes.io/hostname', ' weight: 1', ' containers:', ' - command:', ' - virt-operator', ' - --port', ' - \"8443\"', ' - -v', ' - \"2\"', ' env:', ' - name: OPERATOR_IMAGE', ' value: docker.io/kubevirt/virt-operator:v0.29.2', ' - name: WATCH_NAMESPACE', ' valueFrom:', ' fieldRef:', \" fieldPath: metadata.annotations[\\\\'olm.targetNamespaces\\\\']\", ' image: docker.io/kubevirt/virt-operator:v0.29.2', ' imagePullPolicy: IfNotPresent', ' name: virt-operator', ' ports:', ' - containerPort: 8443', ' name: metrics', ' protocol: TCP', ' - containerPort: 8444', ' name: webhooks', ' protocol: TCP', ' readinessProbe:', ' httpGet:', ' path: /metrics', ' port: 8443', ' scheme: HTTPS', ' initialDelaySeconds: 5', ' timeoutSeconds: 10', ' resources: {}', ' volumeMounts:', ' - mountPath: /etc/virt-operator/certificates', ' name: kubevirt-operator-certs', ' readOnly: true', ' priorityClassName: kubevirt-cluster-critical', ' securityContext:', ' runAsNonRoot: true', ' serviceAccountName: kubevirt-operator', ' tolerations:', ' - key: CriticalAddonsOnly', ' operator: Exists', ' volumes:', ' - name: kubevirt-operator-certs', ' secret:', ' optional: true', ' secretName: kubevirt-operator-certs', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: kubevirt-ssp-operator', ' name: kubevirt-ssp-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: kubevirt-ssp-operator', ' strategy: {}', ' template:', ' metadata:', ' labels:', ' name: kubevirt-ssp-operator', ' spec:', ' containers:', ' - env:', ' - name: POD_NAME', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.name', ' - name: IMAGE_REFERENCE', ' value: quay.io/fromani/kubevirt-ssp-operator-container:v1.0.35', ' - name: WATCH_NAMESPACE', ' - name: KVM_INFO_TAG', ' - name: VALIDATOR_TAG', ' - name: VIRT_LAUNCHER_TAG', ' - name: NODE_LABELLER_TAG', ' - name: CPU_PLUGIN_TAG', ' - name: IMAGE_NAME_PREFIX', ' - name: OPERATOR_NAME', ' value: kubevirt-ssp-operator', ' image: quay.io/fromani/kubevirt-ssp-operator-container:v1.0.35', ' imagePullPolicy: Always', ' name: kubevirt-ssp-operator', ' ports:', ' - containerPort: 60000', ' name: metrics', ' resources: {}', ' serviceAccountName: kubevirt-ssp-operator', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: cdi-operator', ' name: cdi-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: cdi-operator', ' operator.cdi.kubevirt.io: \"\"', ' strategy: {}', ' template:', ' metadata:', ' labels:', ' name: cdi-operator', ' operator.cdi.kubevirt.io: \"\"', ' spec:', ' containers:', ' - env:', ' - name: DEPLOY_CLUSTER_RESOURCES', ' value: \"true\"', ' - name: OPERATOR_VERSION', ' value: v1.18.0', ' - name: CONTROLLER_IMAGE', ' value: docker.io/kubevirt/cdi-controller:v1.18.0', ' - name: IMPORTER_IMAGE', ' value: docker.io/kubevirt/cdi-importer:v1.18.0', ' - name: CLONER_IMAGE', ' value: docker.io/kubevirt/cdi-cloner:v1.18.0', ' - name: APISERVER_IMAGE', ' value: docker.io/kubevirt/cdi-apiserver:v1.18.0', ' - name: UPLOAD_SERVER_IMAGE', ' value: docker.io/kubevirt/cdi-uploadserver:v1.18.0', ' - name: UPLOAD_PROXY_IMAGE', ' value: docker.io/kubevirt/cdi-uploadproxy:v1.18.0', ' - name: VERBOSITY', ' value: \"1\"', ' - name: PULL_POLICY', ' value: IfNotPresent', ' image: docker.io/kubevirt/cdi-operator:v1.18.0', ' imagePullPolicy: IfNotPresent', ' name: cdi-operator', ' ports:', ' - containerPort: 60000', ' name: metrics', ' protocol: TCP', ' resources: {}', ' securityContext:', ' runAsNonRoot: true', ' serviceAccountName: cdi-operator', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: node-maintenance-operator', ' name: node-maintenance-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: node-maintenance-operator', ' strategy: {}', ' template:', ' metadata:', ' labels:', ' name: node-maintenance-operator', ' spec:', ' affinity:', ' nodeAffinity:', ' requiredDuringSchedulingIgnoredDuringExecution:', ' nodeSelectorTerms:', ' - matchExpressions:', ' - key: node-role.kubernetes.io/master', ' operator: Exists', ' containers:', ' - env:', ' - name: WATCH_NAMESPACE', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.namespace', ' - name: POD_NAME', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.name', ' - name: OPERATOR_NAME', ' value: node-maintenance-operator', ' image: quay.io/kubevirt/node-maintenance-operator:v0.6.0', ' imagePullPolicy: Always', ' name: node-maintenance-operator', ' resources: {}', ' serviceAccountName: node-maintenance-operator', ' tolerations:', ' - effect: NoSchedule', ' key: node-role.kubernetes.io/master', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: hostpath-provisioner-operator', ' name: hostpath-provisioner-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: hostpath-provisioner-operator', ' operator.hostpath-provisioner.kubevirt.io: \"\"', ' strategy: {}', ' template:', ' metadata:', ' labels:', ' name: hostpath-provisioner-operator', ' operator.hostpath-provisioner.kubevirt.io: \"\"', ' spec:', ' containers:', ' - env:', ' - name: WATCH_NAMESPACE', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.namespace', ' - name: POD_NAME', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.name', ' - name: OPERATOR_NAME', ' value: hostpath-provisioner-operator', ' - name: PROVISIONER_IMAGE', ' value: quay.io/kubevirt/hostpath-provisioner:v0.4.0', ' - name: PULL_POLICY', ' value: IfNotPresent', ' image: quay.io/kubevirt/hostpath-provisioner-operator:v0.4.3', ' imagePullPolicy: IfNotPresent', ' name: hostpath-provisioner-operator', ' resources: {}', ' serviceAccountName: hostpath-provisioner-operator', '---', 'apiVersion: apps/v1', 'kind: Deployment', 'metadata:', ' labels:', ' name: vm-import-operator', ' name: vm-import-operator', 'spec:', ' replicas: 1', ' selector:', ' matchLabels:', ' name: vm-import-operator', ' operator.v2v.kubevirt.io: \"\"', ' strategy: {}', ' template:', ' metadata:', ' labels:', ' name: vm-import-operator', ' operator.v2v.kubevirt.io: \"\"', ' spec:', ' containers:', ' - env:', ' - name: DEPLOY_CLUSTER_RESOURCES', ' value: \"true\"', ' - name: OPERATOR_VERSION', ' value: v0.0.2', ' - name: CONTROLLER_IMAGE', ' value: quay.io/kubevirt/vm-import-controller:v0.0.2', ' - name: PULL_POLICY', ' value: IfNotPresent', ' - name: WATCH_NAMESPACE', ' - name: POD_NAME', ' valueFrom:', ' fieldRef:', ' fieldPath: metadata.name', ' image: quay.io/kubevirt/vm-import-operator:v0.0.2', ' imagePullPolicy: IfNotPresent', ' name: vm-import-operator', ' resources: {}', ' securityContext:', ' runAsNonRoot: true', ' serviceAccountName: vm-import-operator', 'deployment.apps/hyperconverged-cluster-operator created', 'deployment.apps/cluster-network-addons-operator created', 'deployment.apps/virt-operator created', 'deployment.apps/kubevirt-ssp-operator created', 'deployment.apps/cdi-operator created', 'deployment.apps/node-maintenance-operator created', 'deployment.apps/hostpath-provisioner-operator created', 'deployment.apps/vm-import-operator created', '+ sleep 20', '+ kubectl wait deployment/hyperconverged-cluster-operator --for=condition=Available --timeout=1080s', 'deployment.apps/hyperconverged-cluster-operator condition met', '+ for op in cdi-operator cluster-network-addons-operator kubevirt-ssp-operator node-maintenance-operator vm-import-operator', '+ kubectl wait deployment/cdi-operator --for=condition=Available --timeout=540s', 'deployment.apps/cdi-operator condition met', '+ for op in cdi-operator cluster-network-addons-operator kubevirt-ssp-operator node-maintenance-operator vm-import-operator', '+ kubectl wait deployment/cluster-network-addons-operator --for=condition=Available --timeout=540s', 'deployment.apps/cluster-network-addons-operator condition met', '+ for op in cdi-operator cluster-network-addons-operator kubevirt-ssp-operator node-maintenance-operator vm-import-operator', '+ kubectl wait deployment/kubevirt-ssp-operator --for=condition=Available --timeout=540s', 'deployment.apps/kubevirt-ssp-operator condition met', '+ for op in cdi-operator cluster-network-addons-operator kubevirt-ssp-operator node-maintenance-operator vm-import-operator', '+ kubectl wait deployment/node-maintenance-operator --for=condition=Available --timeout=540s', 'deployment.apps/node-maintenance-operator condition met', '+ for op in cdi-operator cluster-network-addons-operator kubevirt-ssp-operator node-maintenance-operator vm-import-operator', '+ kubectl wait deployment/vm-import-operator --for=condition=Available --timeout=540s', 'deployment.apps/vm-import-operator condition met', '+ kubectl create -f _out/hco.cr.yaml', 'hyperconverged.hco.kubevirt.io/kubevirt-hyperconverged created', '+ sleep 10', '+ timeout 30m bash -c -- \\\\\\'until kubectl get -n kubevirt-hyperconverged hyperconvergeds kubevirt-hyperconverged -o go-template=\\\\\\'\\\\\\\\\\\\\\'\\\\\\'{{ range .status.conditions }}{{ if eq .type \"Available\" }}{{ .status }}{{ end }}{{ end }}\\\\\\'\\\\\\\\\\\\\\'\\\\\\' | grep True; do sleep 1; done\\\\\\'', 'True', '+ kubectl get -n kubevirt-hyperconverged hyperconvergeds kubevirt-hyperconverged -o \\\\\\'go-template={{ range .status.conditions }}{{ .type }}{{ \"\\\\\\\\t\" }}{{ .status }}{{ \"\\\\\\\\t\" }}{{ .message }}{{ \"\\\\', '\" }}{{ end }}\\\\\\'', 'ReconcileComplete\\\\tTrue\\\\tReconcile completed successfully', 'Available\\\\tTrue\\\\tReconcile completed successfully', 'Progressing\\\\tFalse\\\\tReconcile completed successfully', 'Degraded\\\\tFalse\\\\tReconcile completed successfully', 'Upgradeable\\\\tTrue\\\\tReconcile completed successfully', '+ for dep in cdi-apiserver cdi-deployment cdi-uploadproxy virt-api virt-controller', '+ kubectl wait deployment/cdi-apiserver --for=condition=Available --timeout=360s', 'deployment.apps/cdi-apiserver condition met', '+ for dep in cdi-apiserver cdi-deployment cdi-uploadproxy virt-api virt-controller', '+ kubectl wait deployment/cdi-deployment --for=condition=Available --timeout=360s', 'deployment.apps/cdi-deployment condition met', '+ for dep in cdi-apiserver cdi-deployment cdi-uploadproxy virt-api virt-controller', '+ kubectl wait deployment/cdi-uploadproxy --for=condition=Available --timeout=360s', 'deployment.apps/cdi-uploadproxy condition met', '+ for dep in cdi-apiserver cdi-deployment cdi-uploadproxy virt-api virt-controller', '+ kubectl wait deployment/virt-api --for=condition=Available --timeout=360s', 'deployment.apps/virt-api condition met', '+ for dep in cdi-apiserver cdi-deployment cdi-uploadproxy virt-api virt-controller', '+ kubectl wait deployment/virt-controller --for=condition=Available --timeout=360s', 'deployment.apps/virt-controller condition met', 'SUCCESS', \"+ \\\\'[\\\\' -z \\\\'\\\\' \\\\']\\\\'\", '+ echo SUCCESS', '+ exit 0', '+ status', '+ kubectl get hco -n kubevirt-hyperconverged -o yaml', 'apiVersion: v1', 'items:', '- apiVersion: hco.kubevirt.io/v1alpha1', ' kind: HyperConverged', ' metadata:', ' creationTimestamp: \"2020-09-26T22:56:57Z\"', ' finalizers:', ' - hyperconvergeds.hco.kubevirt.io', ' generation: 1', ' managedFields:', ' - apiVersion: hco.kubevirt.io/v1alpha1', ' fieldsType: FieldsV1', ' fieldsV1:', ' f:spec: {}', ' manager: kubectl', ' operation: Update', ' time: \"2020-09-26T22:56:57Z\"', ' - apiVersion: hco.kubevirt.io/v1alpha1', ' fieldsType: FieldsV1', ' fieldsV1:', ' f:metadata:', ' f:finalizers:', ' .: {}', ' v:\"hyperconvergeds.hco.kubevirt.io\": {}', ' f:status:', ' .: {}', ' f:conditions: {}', ' f:relatedObjects: {}', ' f:versions: {}', ' manager: hyperconverged-cluster-operator', ' operation: Update', ' time: \"2020-09-26T22:59:26Z\"', ' name: kubevirt-hyperconverged', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"26759\"', ' selfLink: /apis/hco.kubevirt.io/v1alpha1/namespaces/kubevirt-hyperconverged/hyperconvergeds/kubevirt-hyperconverged', ' uid: e2f29e5a-53b2-450e-8c76-656277cd887a', ' spec: {}', ' status:', ' conditions:', ' - lastHeartbeatTime: \"2020-09-26T22:59:26Z\"', ' lastTransitionTime: \"2020-09-26T22:56:58Z\"', ' message: Reconcile completed successfully', ' reason: ReconcileCompleted', ' status: \"True\"', ' type: ReconcileComplete', ' - lastHeartbeatTime: \"2020-09-26T22:59:26Z\"', ' lastTransitionTime: \"2020-09-26T22:59:26Z\"', ' message: Reconcile completed successfully', ' reason: ReconcileCompleted', ' status: \"True\"', ' type: Available', ' - lastHeartbeatTime: \"2020-09-26T22:59:26Z\"', ' lastTransitionTime: \"2020-09-26T22:59:26Z\"', ' message: Reconcile completed successfully', ' reason: ReconcileCompleted', ' status: \"False\"', ' type: Progressing', ' - lastHeartbeatTime: \"2020-09-26T22:59:26Z\"', ' lastTransitionTime: \"2020-09-26T22:58:11Z\"', ' message: Reconcile completed successfully', ' reason: ReconcileCompleted', ' status: \"False\"', ' type: Degraded', ' - lastHeartbeatTime: \"2020-09-26T22:59:26Z\"', ' lastTransitionTime: \"2020-09-26T22:59:26Z\"', ' message: Reconcile completed successfully', ' reason: ReconcileCompleted', ' status: \"True\"', ' type: Upgradeable', ' relatedObjects:', ' - apiVersion: scheduling.k8s.io/v1', ' kind: PriorityClass', ' name: kubevirt-cluster-critical', ' resourceVersion: \"23988\"', ' uid: bc0c51a9-81e3-4caa-8d00-b684c22012ba', ' - apiVersion: v1', ' kind: ConfigMap', ' name: kubevirt-config', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"23989\"', ' uid: 01231030-c0d7-4191-a19c-25ba0e8cc74e', ' - apiVersion: v1', ' kind: ConfigMap', ' name: kubevirt-storage-class-defaults', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"23990\"', ' uid: 37aae361-4d68-4629-9d53-7aabb218e263', ' - apiVersion: kubevirt.io/v1alpha3', ' kind: KubeVirt', ' name: kubevirt-kubevirt-hyperconverged', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"26756\"', ' uid: e852e27d-7c0f-4395-8c74-139e1280e401', ' - apiVersion: cdi.kubevirt.io/v1alpha1', ' kind: CDI', ' name: cdi-kubevirt-hyperconverged', ' resourceVersion: \"25550\"', ' uid: a3376ac7-d538-4b4a-b0da-1ab8eee40888', ' - apiVersion: networkaddonsoperator.network.kubevirt.io/v1alpha1', ' kind: NetworkAddonsConfig', ' name: cluster', ' resourceVersion: \"26566\"', ' uid: e4e358ab-3813-41fc-9dda-740470afd5d7', ' - apiVersion: ssp.kubevirt.io/v1', ' kind: KubevirtCommonTemplatesBundle', ' name: common-templates-kubevirt-hyperconverged', ' namespace: openshift', ' resourceVersion: \"26675\"', ' uid: 443fb1b8-fe95-4eb6-95d2-f4dacb1d1da8', ' - apiVersion: ssp.kubevirt.io/v1', ' kind: KubevirtNodeLabellerBundle', ' name: node-labeller-kubevirt-hyperconverged', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"25257\"', ' uid: 40479c0a-9f75-427d-864a-5778a95c5b36', ' - apiVersion: ssp.kubevirt.io/v1', ' kind: KubevirtTemplateValidator', ' name: template-validator-kubevirt-hyperconverged', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"25198\"', ' uid: d530b77f-8263-40e9-8c65-5b1935def74d', ' - apiVersion: ssp.kubevirt.io/v1', ' kind: KubevirtMetricsAggregation', ' name: metrics-aggregation-kubevirt-hyperconverged', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"24846\"', ' uid: 85399c1d-e618-4979-9831-107bbe88584c', ' - apiVersion: v1', ' kind: ConfigMap', ' name: v2v-vmware', ' namespace: kubevirt-hyperconverged', ' resourceVersion: \"23999\"', ' uid: 2d9695c3-4571-4529-a148-b91b9ec1b099', ' - apiVersion: v2v.kubevirt.io/v1alpha1', ' kind: VMImportConfig', ' name: vmimport-kubevirt-hyperconverged', ' resourceVersion: \"25761\"', ' uid: bebc5dde-e31e-40d7-b8a4-2af7858e1256', ' versions:', ' - name: operator', ' version: 1.1.0', 'kind: List', 'metadata:', ' resourceVersion: \"\"', ' selfLink: \"\"', '+ kubectl get pods -n kubevirt-hyperconverged', 'NAME READY STATUS RESTARTS AGE', 'bridge-marker-5cgj2 1/1 Running 0 2m41s', 'bridge-marker-6gxrq 1/1 Running 0 2m41s', 'bridge-marker-6l9lg 1/1 Running 0 2m41s', 'bridge-marker-6ttjz 1/1 Running 0 2m41s', 'bridge-marker-bn8vh 1/1 Running 0 2m41s', 'bridge-marker-gp95h 1/1 Running 0 2m41s', 'cdi-apiserver-66c5fb4595-jz8jz 1/1 Running 0 2m45s', 'cdi-deployment-7f6c498dcf-fk4ff 1/1 Running 0 2m46s', 'cdi-operator-748565fbd8-l7z5n 1/1 Running 0 3m12s', 'cdi-uploadproxy-748db7f7c4-kgtvl 1/1 Running 0 2m46s', 'cluster-network-addons-operator-85ff784c67-ml5d2 1/1 Running 0 3m12s', 'hostpath-provisioner-operator-6bbf5ffd9c-vx2k8 1/1 Running 0 3m11s', 'hyperconverged-cluster-operator-59fd8f5765-m5sgp 1/1 Running 0 3m12s', 'kube-cni-linux-bridge-plugin-46j5b 1/1 Running 0 2m41s', 'kube-cni-linux-bridge-plugin-4gmts 1/1 Running 0 2m41s', 'kube-cni-linux-bridge-plugin-96jpq 1/1 Running 0 2m41s', 'kube-cni-linux-bridge-plugin-vlgg2 1/1 Running 0 2m41s', 'kube-cni-linux-bridge-plugin-wjvvb 1/1 Running 0 2m41s', 'kube-cni-linux-bridge-plugin-wvj4q 1/1 Running 0 2m41s', 'kubemacpool-mac-controller-manager-76596b6f8f-njl5v 1/1 Running 0 2m42s', 'kubemacpool-mac-controller-manager-76596b6f8f-pqzvz 0/1 Running 0 2m42s', 'kubevirt-node-labeller-q2dhw 1/1 Running 0 2m4s', 'kubevirt-node-labeller-qpgj8 1/1 Running 0 2m4s', 'kubevirt-node-labeller-xsjz5 1/1 Running 0 2m4s', 'kubevirt-ssp-operator-55d9699848-hkn85 1/1 Running 0 3m12s', 'nmstate-handler-4qssj 1/1 Running 0 2m40s', 'nmstate-handler-kvwdl 1/1 Running 0 2m40s', 'nmstate-handler-l5dcp 1/1 Running 0 2m40s', 'nmstate-handler-s48bq 1/1 Running 0 2m40s', 'nmstate-handler-z54nk 1/1 Running 0 2m40s', 'nmstate-handler-zq9bl 1/1 Running 0 2m40s', 'node-maintenance-operator-68fb7b4889-dcfdn 1/1 Running 0 3m11s', 'ovs-cni-amd64-6rmvn 2/2 Running 0 2m40s', 'ovs-cni-amd64-8z6x2 2/2 Running 0 2m40s', 'ovs-cni-amd64-f5jw2 2/2 Running 0 2m40s', 'ovs-cni-amd64-f96kj 2/2 Running 0 2m40s', 'ovs-cni-amd64-rb8wd 2/2 Running 0 2m40s', 'ovs-cni-amd64-vn2xn 2/2 Running 0 2m40s', 'virt-api-7cf789bcb8-2b85v 1/1 Running 0 106s', 'virt-api-7cf789bcb8-dm4qg 1/1 Running 0 106s', 'virt-controller-7557668d6b-95kz4 1/1 Running 0 70s', 'virt-controller-7557668d6b-h2trd 1/1 Running 0 70s', 'virt-handler-495rk 1/1 Running 0 70s', 'virt-handler-6gptn 1/1 Running 0 70s', 'virt-handler-85wgv 1/1 Running 0 70s', 'virt-operator-6cbcb47b78-9v4tf 1/1 Running 0 2m30s', 'virt-operator-6cbcb47b78-bkvmj 1/1 Running 0 2m30s', 'virt-template-validator-b7bcb65d4-gpv92 0/1 CrashLoopBackOff 4 2m5s', 'virt-template-validator-b7bcb65d4-z2xct 0/1 CrashLoopBackOff 3 2m5s', 'vm-import-controller-6c64995476-vtcfb 1/1 Running 2 2m47s', 'vm-import-operator-6df99df7cd-9chmq 1/1 Running 0 3m11s', '+ kubectl get hco kubevirt-hyperconverged -n kubevirt-hyperconverged \\\\\\'-o=jsonpath={range .status.conditions[*]}{.type}{\"\\\\\\\\t\"}{.status}{\"\\\\\\\\t\"}{.message}{\"\\\\', '\"}{end}\\\\\\'', 'ReconcileComplete\\\\tTrue\\\\tReconcile completed successfully', 'Available\\\\tTrue\\\\tReconcile completed successfully', 'Progressing\\\\tFalse\\\\tReconcile completed successfully', 'Degraded\\\\tFalse\\\\tReconcile completed successfully', 'Upgradeable\\\\tTrue\\\\tReconcile completed successfully', \"++ kubectl get pods -n kubevirt-hyperconverged \\\\'--field-selector=status.phase!=Running\\\\' -o custom-columns=:metadata.name\", 'which: no docker in (/tmp/shared:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin:/go/bin)', 'eval ./hack/build-tests.sh', 'go: github.com/kubevirt/cluster-network-addons-operator@v0.3.1-0.20200527095331-9cc2867ac8dc requires', '\\\\tgithub.com/operator-framework/operator-sdk@v0.12.0 requires', '\\\\tgithub.com/operator-framework/operator-registry@v1.1.1 requires', '\\\\tbitbucket.org/ww/goautoneg@v0.0.0-20120707110453-75cd24fc2f2c: reading https://api.bitbucket.org/2.0/repositories/ww/goautoneg?fields=scm: 404 Not Found', 'make: *** [build-functest] Error 1', '2020/09/26 22:59:52 Container test in pod e2e-aws-cnv failed, exit code 2, reason Error', '2020/09/26 23:14:27 Copied 112.27MB of artifacts from e2e-aws-cnv to /logs/artifacts/e2e-aws', '2020/09/26 23:14:28 Releasing lease for \"aws-quota-slice\"', '2020/09/26 23:14:28 No custom metadata found and prow metadata already exists. Not updating the metadata.', '2020/09/26 23:14:28 Ran for 53m50s', 'error: some steps failed:', ' * could not run steps: step e2e-aws failed: template pod \"e2e-aws-cnv\" failed: the pod ci-op-xl3p51qp/e2e-aws-cnv failed after 52m26s (failed containers: test): ContainerFailed one or more containers exited', '', 'Container test exited with code 2, reason Error', '---', 'd6b-h2trd 1/1 Running 0 70s', 'virt-handler-495rk 1/1 Running 0 70s', 'virt-handler-6gptn 1/1 Running 0 70s', 'virt-handler-85wgv 1/1 Running 0 70s', 'virt-operator-6cbcb47b78-9v4tf 1/1 Running 0 2m30s', 'virt-operator-6cbcb47b78-bkvmj 1/1 Running 0 2m30s', 'virt-template-validator-b7bcb65d4-gpv92 0/1 CrashLoopBackOff 4 2m5s', 'virt-template-validator-b7bcb65d4-z2xct 0/1 CrashLoopBackOff 3 2m5s', 'vm-import-controller-6c64995476-vtcfb 1/1 Running 2 2m47s', 'vm-import-operator-6df99df7cd-9chmq 1/1 Running 0 3m11s', '+ kubectl get hco kubevirt-hyperconverged -n kubevirt-hyperconverged \\\\\\'-o=jsonpath={range .status.conditions[*]}{.type}{\"\\\\\\\\t\"}{.status}{\"\\\\\\\\t\"}{.message}{\"\\\\', '\"}{end}\\\\\\'', 'ReconcileComplete\\\\tTrue\\\\tReconcile completed successfully', 'Available\\\\tTrue\\\\tReconcile completed successfully', 'Progressing\\\\tFalse\\\\tReconcile completed successfully', 'Degraded\\\\tFalse\\\\tReconcile completed successfully', 'Upgradeable\\\\tTrue\\\\tReconcile completed successfully', \"++ kubectl get pods -n kubevirt-hyperconverged \\\\'--field-selector=status.phase!=Running\\\\' -o custom-columns=:metadata.name\", 'which: no docker in (/tmp/shared:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin:/go/bin)', 'eval ./hack/build-tests.sh', 'go: github.com/kubevirt/cluster-network-addons-operator@v0.3.1-0.20200527095331-9cc2867ac8dc requires', '\\\\tgithub.com/operator-framework/operator-sdk@v0.12.0 requires', '\\\\tgithub.com/operator-framework/operator-registry@v1.1.1 requires', '\\\\tbitbucket.org/ww/goautoneg@v0.0.0-20120707110453-75cd24fc2f2c: reading https://api.bitbucket.org/2.0/repositories/ww/goautoneg?fields=scm: 404 Not Found', 'make: *** [build-functest] Error 1', '---', '2020/09/26 23:14:28 could not load result reporting options: failed to read file \"\": open : no such file or directory', \"'\"]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yaYWmzJoL1NL", + "outputId": "19cba100-d575-47bd-800e-4a6ce69c9fd8" + }, + "source": [ + "#url core needed to pull\n", + "website2 = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com\"\n", + "base2 = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/release-openshift-ocp-installer-e2e-aws-serial-4.1/\"\n", + "ending2 = \"build-log.txt\"\n", + "page2 = requests.get(base2) \n", + "data2 = page2.text\n", + "soup2 = BeautifulSoup(data2)\n", + "links2 = []\n", + "for link2 in soup2.find_all('a'):\n", + " links2.append(link2.get('href'))\n", + "links2 = links2[1:-1]\n", + "\n", + "final_array2 = []\n", + "# create array of urls\n", + "for x in range(len(links2)):\n", + " final_array2.append(str(website2) + str(links2[x]) + str(ending2))\n", + "\n", + "\n", + "# pull all urls logs and store in 2-d array where array_of_logs[x] is a build-log file and \n", + "# array_of_logs[x][y] is an individual log line split by new line\n", + "array_of_logs2 = []\n", + "for x in range(len(final_array2)):\n", + " page2 = urlopen(final_array2[x])\n", + " html_bytes2 = page2.read()\n", + " array_of_logs2.append(str(html_bytes2).split('\\\\n'))\n", + " \n", + "# first log\n", + "print(array_of_logs2[0])\n", + "\n", + "\n", + "# Analysis on the log data. Trying to find a framework. API (Ningxiao, Parker, Tianze, Hong)\n", + "# Identify limitations with data and potential risks of achieving project goals.\n", + "\n", + "\n", + "# ******* Tianze *******\n", + "#ignore useless \"Waiting for setup to finish...\"\n", + "def ignoreWaiting(logs):\n", + " for i in range(len(logs)):\n", + " if \"Waiting for setup to finish...\" in logs[i]:\n", + " logs[i]=\"\"\n", + " return logs\n", + "# ******* Tianze *******\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[\"b'2020/09/25 21:55:40 ci-operator version v20200924-c41f44a\", '2020/09/25 21:55:40 No source defined', '2020/09/25 21:55:40 Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', '2020/09/25 21:55:40 Using namespace https://console.svc.ci.openshift.org/k8s/cluster/projects/ci-op-kcf6vnx8', '2020/09/25 21:55:40 Running [release-inputs], [images], [release:latest], e2e-aws-serial', '2020/09/25 21:55:40 Creating namespace ci-op-kcf6vnx8', '2020/09/25 21:55:40 Setting up pipeline imagestream for the test', '2020/09/25 21:55:40 Created secret e2e-aws-serial-cluster-profile', '2020/09/25 21:55:40 Created secret pull-secret', '2020/09/25 21:55:40 Created PDB for pods with openshift.io/build.name label', '2020/09/25 21:55:40 Created PDB for pods with created-by-ci label', '2020/09/25 21:55:40 Tagged shared images from ocp/4.1:${component}, images will be pullable from registry.svc.ci.openshift.org/ci-op-kcf6vnx8/stable:${component}', '2020/09/25 21:55:42 Importing release image latest', '2020/09/25 21:56:48 Imported release 4.1.0-0.nightly-2020-07-29-210856 created at 2020-07-29 21:11:43 +0000 UTC with 84 images to tag release:latest', '2020/09/25 21:56:48 Acquiring lease for \"aws-quota-slice\"', '2020/09/25 21:56:48 Acquired lease \"c760a5ee-6cd8-45a1-bb8a-4002563fc9a8\" for \"aws-quota-slice\"', '2020/09/25 21:56:48 Executing template e2e-aws-serial', '2020/09/25 21:56:48 Creating or restarting template instance', '2020/09/25 21:56:48 Template instance e2e-aws-serial already deleted, do not need to wait any longer', '2020/09/25 21:56:48 Waiting for template instance to be ready', '2020/09/25 21:56:50 Running pod e2e-aws-serial', '2020/09/25 22:31:04 Container setup in pod e2e-aws-serial completed successfully', '2020/09/25 23:59:33 Copied 122.58MB of artifacts from e2e-aws-serial to /logs/artifacts/e2e-aws-serial', '2020/09/25 23:59:33 Releasing lease for \"aws-quota-slice\"', '2020/09/25 23:59:33 No custom metadata found and prow metadata already exists. Not updating the metadata.', '2020/09/25 23:59:34 Ran for 2h3m53s', '2020/09/25 23:59:34 could not load result reporting options: failed to read file \"\": open : no such file or directory', \"'\"]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "aRyol0yU4HP_", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 232 + }, + "outputId": "2622a09d-6a9c-4a36-e2ef-f8d95ee07c83" + }, + "source": [ + "# ******* PARKER *******\n", + "# helper function detecting if a string is a date / timestamp\n", + "def is_date(str):\n", + " try:\n", + " dateutil.parser.parse(str)\n", + " return True\n", + " except:\n", + " return False\n", + "\n", + "# logs variable contains all parsed logs\n", + "logs = []\n", + "for i in range(len(array_of_logs2)):\n", + " # removing newline characters\n", + " array_of_logs2[i] = str(array_of_logs[i]).splitlines()\n", + " array_of_logs2[i] = str(array_of_logs[i]).replace('\\\\n', ' ')\n", + " \n", + "\n", + "# removes leading 'b from log\n", + " array_of_logs2[i] = array_of_logs[i][0][2:]\n", + "# splitting each section as its own index (for parsing)\n", + " array_of_logs2[i] = str(array_of_logs[i]).split(' ')\n", + "\n", + "# tmp is log without timestamps\n", + " tmp = []\n", + " for j in range(len(array_of_logs2[i])):\n", + " if is_date(array_of_logs2[i][j]) == False:\n", + " tmp.append(array_of_logs2[i][j])\n", + " else:\n", + " continue\n", + " logs.append(tmp)\n", + "\n", + "# removes whitespace characters and keeps root words\n", + "stemmer = SnowballStemmer(\"english\")\n", + "for log in logs:\n", + " log[:] = [stemmer.stem(x) for x in log if x != '']" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "IndexError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_logs2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m# removing newline characters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0marray_of_logs2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplitlines\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0marray_of_logs2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\\\n'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: list index out of range" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WtFxnlMOmp3W", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f2995c42-34b9-436d-87e7-6f5eea86c085" + }, + "source": [ + "import drain3\n", + "from drain3 import TemplateMiner\n", + "import json\n", + "import logging\n", + "import sys\n", + "from drain3.kafka_persistence import KafkaPersistence\n", + "\n", + "template_miner = TemplateMiner(None)\n", + "i = 0\n", + "array_of_logs2 = array_of_logs2[:100]\n", + "while True:\n", + " if i >= len(array_of_logs2):\n", + " break\n", + " log_line = ' '.join(array_of_logs2[i])\n", + " i += 1\n", + " if log_line == 'q':\n", + " break\n", + " result = template_miner.add_log_message(log_line)\n", + " result_json = json.dumps(result)\n", + " print(result_json)\n", + "\n", + "print(\"Clusters:\")\n", + "for cluster in template_miner.drain.clusters:\n", + " print(cluster)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "config file not found: drain3.ini\n", + "IOPub data rate exceeded.\n", + "The notebook server will temporarily stop sending output\n", + "to the client in order to avoid crashing it.\n", + "To change this limit, set the config variable\n", + "`--NotebookApp.iopub_data_rate_limit`.\n", + "\n", + "Current values:\n", + "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n", + "NotebookApp.rate_limit_window=3.0 (secs)\n", + "\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9RPAVaF3LoRb", + "outputId": "a2e91809-f2d1-4783-ca20-28b737a58e7e" + }, + "source": [ + "!pip3 install drain3\n", + "!pip3 install kafka-python\n", + "!pip3 install redis" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting drain3\n", + " Downloading https://files.pythonhosted.org/packages/03/df/cd2118d85b0401cd4b2cc555b97cd5a918ca551a035c77c853087321328e/drain3-0.9.3.tar.gz\n", + "Collecting jsonpickle==1.5.1\n", + " Downloading https://files.pythonhosted.org/packages/77/a7/c2f527ddce3155ae9e008385963c2325cbfd52969f8b38efa2723e2af4af/jsonpickle-1.5.1-py2.py3-none-any.whl\n", + "Requirement already satisfied: cachetools==4.2.1 in /usr/local/lib/python3.7/dist-packages (from drain3) (4.2.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from jsonpickle==1.5.1->drain3) (3.7.2)\n", + "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonpickle==1.5.1->drain3) (3.7.4.3)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonpickle==1.5.1->drain3) (3.4.1)\n", + "Building wheels for collected packages: drain3\n", + " Building wheel for drain3 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for drain3: filename=drain3-0.9.3-cp37-none-any.whl size=16397 sha256=40988bb8854eee8fd03684b245da3c216327103c61103c854093aed2ee220185\n", + " Stored in directory: /root/.cache/pip/wheels/44/eb/c8/0c42c729fa7f47040d8b9bc2e8359a96fee8a4b2bf442fd924\n", + "Successfully built drain3\n", + "Installing collected packages: jsonpickle, drain3\n", + "Successfully installed drain3-0.9.3 jsonpickle-1.5.1\n", + "Collecting kafka-python\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/68/dcb0db055309f680ab2931a3eeb22d865604b638acf8c914bedf4c1a0c8c/kafka_python-2.0.2-py2.py3-none-any.whl (246kB)\n", + "\u001b[K |████████████████████████████████| 256kB 6.8MB/s \n", + "\u001b[?25hInstalling collected packages: kafka-python\n", + "Successfully installed kafka-python-2.0.2\n", + "Collecting redis\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a7/7c/24fb0511df653cf1a5d938d8f5d19802a88cef255706fdda242ff97e91b7/redis-3.5.3-py2.py3-none-any.whl (72kB)\n", + "\u001b[K |████████████████████████████████| 81kB 4.3MB/s \n", + "\u001b[?25hInstalling collected packages: redis\n", + "Successfully installed redis-3.5.3\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DtfC_gaeGv8v" + }, + "source": [ + "The cell below runs the drain3 program. Process the log info and generate clusters and a prefix tree of the log information." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cb0-1Ou6EYbx", + "outputId": "862d6ecb-3a43-48d7-f053-6ae66bb2c3ee" + }, + "source": [ + "import subprocess\n", + "import time\n", + "\n", + "logger = logging.getLogger(__name__)\n", + "logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')\n", + "\n", + "template_miner = TemplateMiner()\n", + "\n", + "line_count = 0\n", + "start_time = time.time()\n", + "batch_start_time = start_time\n", + "batch_size = 10000\n", + "for i in range(len(array_of_logs2)):\n", + " for line in array_of_logs2[i]:\n", + " line = line.rstrip()\n", + " line = line.partition(\": \")[2]\n", + " result = template_miner.add_log_message(line)\n", + " line_count += 1\n", + " if line_count % batch_size == 0:\n", + " time_took = time.time() - batch_start_time\n", + " rate = batch_size / time_took\n", + " logger.info(f\"Processing line: {line_count}, rate {rate:.1f} lines/sec, \"\n", + " f\"{len(template_miner.drain.clusters)} clusters so far.\")\n", + " batch_start_time = time.time()\n", + " if result[\"change_type\"] != \"none\":\n", + " result_json = json.dumps(result)\n", + " logger.info(f\"Input ({line_count}): \" + line)\n", + " logger.info(\"Result: \" + result_json)\n", + "\n", + "time_took = time.time() - start_time\n", + "rate = line_count / time_took\n", + "logger.info(f\"--- Done processing file. Total of {line_count} lines, rate {rate:.1f} lines/sec, \"\n", + " f\"{len(template_miner.drain.clusters)} clusters\")\n", + "sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)\n", + "for cluster in sorted_clusters:\n", + " logger.info(cluster)\n", + "\n", + "print(\"Prefix Tree:\")\n", + "template_miner.drain.print_tree()\n", + "\n", + "template_miner.profiler.report(0)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Starting Drain3 template miner\n", + "Loading configuration from drain3.ini\n", + "config file not found: drain3.ini\n", + "Input (1): \n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 1, \"cluster_size\": 1, \"template_mined\": \"\", \"cluster_count\": 1}\n", + "Processing line: 10000, rate 118121.1 lines/sec, 1 clusters so far.\n", + "Processing line: 20000, rate 128148.2 lines/sec, 1 clusters so far.\n", + "Processing line: 30000, rate 144391.2 lines/sec, 1 clusters so far.\n", + "Processing line: 40000, rate 138624.0 lines/sec, 1 clusters so far.\n", + "Processing line: 50000, rate 135807.4 lines/sec, 1 clusters so far.\n", + "Processing line: 60000, rate 134098.0 lines/sec, 1 clusters so far.\n", + "Processing line: 70000, rate 135249.5 lines/sec, 1 clusters so far.\n", + "Processing line: 80000, rate 129770.2 lines/sec, 1 clusters so far.\n", + "Processing line: 90000, rate 134272.3 lines/sec, 1 clusters so far.\n", + "Processing line: 100000, rate 140000.5 lines/sec, 1 clusters so far.\n", + "Processing line: 110000, rate 132182.7 lines/sec, 1 clusters so far.\n", + "Processing line: 120000, rate 121581.1 lines/sec, 1 clusters so far.\n", + "Processing line: 130000, rate 135760.4 lines/sec, 1 clusters so far.\n", + "Processing line: 140000, rate 146449.7 lines/sec, 1 clusters so far.\n", + "Processing line: 150000, rate 131333.8 lines/sec, 1 clusters so far.\n", + "Processing line: 160000, rate 136841.1 lines/sec, 1 clusters so far.\n", + "Processing line: 170000, rate 141831.2 lines/sec, 1 clusters so far.\n", + "Processing line: 180000, rate 135012.7 lines/sec, 1 clusters so far.\n", + "Processing line: 190000, rate 134598.9 lines/sec, 1 clusters so far.\n", + "Processing line: 200000, rate 139246.6 lines/sec, 1 clusters so far.\n", + "Processing line: 210000, rate 143068.3 lines/sec, 1 clusters so far.\n", + "Processing line: 220000, rate 136838.9 lines/sec, 1 clusters so far.\n", + "Processing line: 230000, rate 128096.9 lines/sec, 1 clusters so far.\n", + "Processing line: 240000, rate 130335.2 lines/sec, 1 clusters so far.\n", + "Processing line: 250000, rate 136070.5 lines/sec, 1 clusters so far.\n", + "Processing line: 260000, rate 137761.2 lines/sec, 1 clusters so far.\n", + "Processing line: 270000, rate 149725.5 lines/sec, 1 clusters so far.\n", + "Processing line: 280000, rate 135413.3 lines/sec, 1 clusters so far.\n", + "Processing line: 290000, rate 145574.4 lines/sec, 1 clusters so far.\n", + "Processing line: 300000, rate 136784.0 lines/sec, 1 clusters so far.\n", + "Processing line: 310000, rate 135356.5 lines/sec, 1 clusters so far.\n", + "Processing line: 320000, rate 126879.8 lines/sec, 1 clusters so far.\n", + "Processing line: 330000, rate 133886.1 lines/sec, 1 clusters so far.\n", + "Processing line: 340000, rate 135834.7 lines/sec, 1 clusters so far.\n", + "Processing line: 350000, rate 135627.4 lines/sec, 1 clusters so far.\n", + "Processing line: 360000, rate 140495.8 lines/sec, 1 clusters so far.\n", + "Processing line: 370000, rate 141877.3 lines/sec, 1 clusters so far.\n", + "Processing line: 380000, rate 140146.5 lines/sec, 1 clusters so far.\n", + "Processing line: 390000, rate 134580.3 lines/sec, 1 clusters so far.\n", + "Processing line: 400000, rate 137391.1 lines/sec, 1 clusters so far.\n", + "Processing line: 410000, rate 128744.9 lines/sec, 1 clusters so far.\n", + "Processing line: 420000, rate 130143.1 lines/sec, 1 clusters so far.\n", + "Processing line: 430000, rate 141732.0 lines/sec, 1 clusters so far.\n", + "Processing line: 440000, rate 136977.4 lines/sec, 1 clusters so far.\n", + "Processing line: 450000, rate 139593.8 lines/sec, 1 clusters so far.\n", + "Processing line: 460000, rate 144790.4 lines/sec, 1 clusters so far.\n", + "Processing line: 470000, rate 137773.4 lines/sec, 1 clusters so far.\n", + "Processing line: 480000, rate 134854.2 lines/sec, 1 clusters so far.\n", + "Processing line: 490000, rate 131235.6 lines/sec, 1 clusters so far.\n", + "Processing line: 500000, rate 130277.3 lines/sec, 1 clusters so far.\n", + "Processing line: 510000, rate 137528.5 lines/sec, 1 clusters so far.\n", + "Processing line: 520000, rate 147393.0 lines/sec, 1 clusters so far.\n", + "Processing line: 530000, rate 130455.2 lines/sec, 1 clusters so far.\n", + "Processing line: 540000, rate 137564.2 lines/sec, 1 clusters so far.\n", + "Processing line: 550000, rate 145514.8 lines/sec, 1 clusters so far.\n", + "Processing line: 560000, rate 138612.5 lines/sec, 1 clusters so far.\n", + "Processing line: 570000, rate 139010.4 lines/sec, 1 clusters so far.\n", + "Processing line: 580000, rate 136523.6 lines/sec, 1 clusters so far.\n", + "Processing line: 590000, rate 121904.8 lines/sec, 1 clusters so far.\n", + "Processing line: 600000, rate 133738.0 lines/sec, 1 clusters so far.\n", + "Processing line: 610000, rate 137351.1 lines/sec, 1 clusters so far.\n", + "Processing line: 620000, rate 139653.3 lines/sec, 1 clusters so far.\n", + "Processing line: 630000, rate 126251.6 lines/sec, 1 clusters so far.\n", + "Processing line: 640000, rate 143255.9 lines/sec, 1 clusters so far.\n", + "Processing line: 650000, rate 147238.8 lines/sec, 1 clusters so far.\n", + "Processing line: 660000, rate 126915.9 lines/sec, 1 clusters so far.\n", + "Processing line: 670000, rate 135129.7 lines/sec, 1 clusters so far.\n", + "Processing line: 680000, rate 129566.6 lines/sec, 1 clusters so far.\n", + "Processing line: 690000, rate 135591.9 lines/sec, 1 clusters so far.\n", + "Processing line: 700000, rate 147134.0 lines/sec, 1 clusters so far.\n", + "Processing line: 710000, rate 136911.7 lines/sec, 1 clusters so far.\n", + "Processing line: 720000, rate 133970.4 lines/sec, 1 clusters so far.\n", + "Processing line: 730000, rate 135731.4 lines/sec, 1 clusters so far.\n", + "Processing line: 740000, rate 147852.3 lines/sec, 1 clusters so far.\n", + "Processing line: 750000, rate 142273.8 lines/sec, 1 clusters so far.\n", + "Processing line: 760000, rate 138137.3 lines/sec, 1 clusters so far.\n", + "Processing line: 770000, rate 131218.8 lines/sec, 1 clusters so far.\n", + "Processing line: 780000, rate 111157.4 lines/sec, 1 clusters so far.\n", + "Processing line: 790000, rate 150556.3 lines/sec, 1 clusters so far.\n", + "Processing line: 800000, rate 127328.9 lines/sec, 1 clusters so far.\n", + "Processing line: 810000, rate 139929.1 lines/sec, 1 clusters so far.\n", + "Processing line: 820000, rate 137494.7 lines/sec, 1 clusters so far.\n", + "Processing line: 830000, rate 147884.1 lines/sec, 1 clusters so far.\n", + "Processing line: 840000, rate 144005.0 lines/sec, 1 clusters so far.\n", + "Processing line: 850000, rate 128560.3 lines/sec, 1 clusters so far.\n", + "Processing line: 860000, rate 131156.4 lines/sec, 1 clusters so far.\n", + "Processing line: 870000, rate 139360.9 lines/sec, 1 clusters so far.\n", + "Processing line: 880000, rate 138659.7 lines/sec, 1 clusters so far.\n", + "Processing line: 890000, rate 145846.2 lines/sec, 1 clusters so far.\n", + "Processing line: 900000, rate 139418.8 lines/sec, 1 clusters so far.\n", + "Processing line: 910000, rate 138572.2 lines/sec, 1 clusters so far.\n", + "Processing line: 920000, rate 136414.8 lines/sec, 1 clusters so far.\n", + "Processing line: 930000, rate 138172.3 lines/sec, 1 clusters so far.\n", + "Processing line: 940000, rate 117203.5 lines/sec, 1 clusters so far.\n", + "Processing line: 950000, rate 139526.9 lines/sec, 1 clusters so far.\n", + "Processing line: 960000, rate 138495.4 lines/sec, 1 clusters so far.\n", + "Processing line: 970000, rate 140319.5 lines/sec, 1 clusters so far.\n", + "Processing line: 980000, rate 142715.8 lines/sec, 1 clusters so far.\n", + "Processing line: 990000, rate 143049.2 lines/sec, 1 clusters so far.\n", + "Processing line: 1000000, rate 155242.2 lines/sec, 1 clusters so far.\n", + "Processing line: 1010000, rate 130838.1 lines/sec, 1 clusters so far.\n", + "Processing line: 1020000, rate 132089.1 lines/sec, 1 clusters so far.\n", + "Processing line: 1030000, rate 141162.5 lines/sec, 1 clusters so far.\n", + "Processing line: 1040000, rate 142479.2 lines/sec, 1 clusters so far.\n", + "Processing line: 1050000, rate 150973.1 lines/sec, 1 clusters so far.\n", + "Processing line: 1060000, rate 142660.0 lines/sec, 1 clusters so far.\n", + "Processing line: 1070000, rate 144624.2 lines/sec, 1 clusters so far.\n", + "Processing line: 1080000, rate 132861.9 lines/sec, 1 clusters so far.\n", + "Processing line: 1090000, rate 142139.8 lines/sec, 1 clusters so far.\n", + "Input (1098448): us-east-1 (zones: us-east-1b us-east-1c)\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 2, \"cluster_size\": 1, \"template_mined\": \"us-east-1 (zones: us-east-1b us-east-1c)\", \"cluster_count\": 2}\n", + "Input (1098457): Error applying plan:\"\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 3, \"cluster_size\": 1, \"template_mined\": \"Error applying plan:\\\"\", \"cluster_count\": 3}\n", + "Input (1098460): 1 error occurred:\"\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 4, \"cluster_size\": 1, \"template_mined\": \"1 error occurred:\\\"\", \"cluster_count\": 4}\n", + "Input (1098461): [ERR]: Error building changeset: timeout while waiting for state to become \\'accepted\\' (timeout: 5m0s)\"\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 5, \"cluster_size\": 1, \"template_mined\": \"[ERR]: Error building changeset: timeout while waiting for state to become \\\\'accepted\\\\' (timeout: 5m0s)\\\"\", \"cluster_count\": 5}\n", + "Input (1098473): failed to generate asset \\\\\"Cluster\\\\\": failed to create cluster: failed to apply using Terraform\"\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 6, \"cluster_size\": 1, \"template_mined\": \"failed to generate asset \\\\\\\\\\\"Cluster\\\\\\\\\\\": failed to create cluster: failed to apply using Terraform\\\"\", \"cluster_count\": 6}\n", + "Input (1098480): some steps failed:\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 7, \"cluster_size\": 1, \"template_mined\": \"some steps failed:\", \"cluster_count\": 7}\n", + "Input (1098481): step e2e-aws-serial failed: template pod \"e2e-aws-serial\" failed: the pod ci-op-6jxq6mqt/e2e-aws-serial failed after 18m49s (failed containers: setup): ContainerFailed one or more containers exited\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 8, \"cluster_size\": 1, \"template_mined\": \"step e2e-aws-serial failed: template pod \\\"e2e-aws-serial\\\" failed: the pod ci-op-6jxq6mqt/e2e-aws-serial failed after 18m49s (failed containers: setup): ContainerFailed one or more containers exited\", \"cluster_count\": 8}\n", + "Input (1098600): [us-west-1--26]\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 9, \"cluster_size\": 1, \"template_mined\": \"[us-west-1--26]\", \"cluster_count\": 9}\n", + "Input (1098606): Failed to patch the ci-op-n2904x8n namespace to update the ci.openshift.io/active annotation: namespaces \"ci-op-n2904x8n\" is forbidden: User \"system:serviceaccount:ci:ci-operator\" cannot patch namespaces in the namespace \"ci-op-n2904x8n\": no RBAC policy matched\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 10, \"cluster_size\": 1, \"template_mined\": \"Failed to patch the ci-op-n2904x8n namespace to update the ci.openshift.io/active annotation: namespaces \\\"ci-op-n2904x8n\\\" is forbidden: User \\\"system:serviceaccount:ci:ci-operator\\\" cannot patch namespaces in the namespace \\\"ci-op-n2904x8n\\\": no RBAC policy matched\", \"cluster_count\": 10}\n", + "Input (1098613): us-west-1--26\n", + "Result: {\"change_type\": \"cluster_created\", \"cluster_id\": 11, \"cluster_size\": 1, \"template_mined\": \"us-west-1--26\", \"cluster_count\": 11}\n", + "--- Done processing file. Total of 1098616 lines, rate 133104.5 lines/sec, 11 clusters\n", + "ID=1 : size=1098601 : \n", + "ID=2 : size=2 : us-east-1 (zones: us-east-1b us-east-1c)\n", + "ID=3 : size=2 : Error applying plan:\"\n", + "ID=4 : size=2 : 1 error occurred:\"\n", + "ID=5 : size=2 : [ERR]: Error building changeset: timeout while waiting for state to become \\'accepted\\' (timeout: 5m0s)\"\n", + "ID=6 : size=2 : failed to generate asset \\\\\"Cluster\\\\\": failed to create cluster: failed to apply using Terraform\"\n", + "ID=7 : size=1 : some steps failed:\n", + "ID=8 : size=1 : step e2e-aws-serial failed: template pod \"e2e-aws-serial\" failed: the pod ci-op-6jxq6mqt/e2e-aws-serial failed after 18m49s (failed containers: setup): ContainerFailed one or more containers exited\n", + "ID=9 : size=1 : [us-west-1--26]\n", + "ID=10 : size=1 : Failed to patch the ci-op-n2904x8n namespace to update the ci.openshift.io/active annotation: namespaces \"ci-op-n2904x8n\" is forbidden: User \"system:serviceaccount:ci:ci-operator\" cannot patch namespaces in the namespace \"ci-op-n2904x8n\": no RBAC policy matched\n", + "ID=11 : size=1 : us-west-1--26\n", + "Prefix Tree:\n", + "\n", + "\t<0>\n", + "\t<4>\n", + "\t\t<*>\n", + "\t<3>\n", + "\t\tError\n", + "\t\t<*>\n", + "\t\tsome\n", + "\t<14>\n", + "\t\t[ERR]:\n", + "\t\tfailed\n", + "\t<22>\n", + "\t\tstep\n", + "\t<1>\n", + "\t<28>\n", + "\t\tFailed\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P2QaVqCj4NIF" + }, + "source": [ + "This cell removes dates and timestamps, whitespace and keeps the stems of words\n", + "\n", + "> Indented block\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yGDZGWSf32JQ" + }, + "source": [ + "# ******* PARKER *******\n", + "failures = []\n", + "successes = []\n", + "# word order for matrix: 0: fail, 1: error, 2: success, 3: run, 4: crashloopbackoff\n", + "word_matrix = []\n", + "for log in logs:\n", + " tmp = ' '.join(log)\n", + " tmp = TextBlob(tmp)\n", + " fail = tmp.word_counts['fail']\n", + " error = tmp.word_counts['error']\n", + " success = tmp.word_counts['success']\n", + " run = tmp.word_counts['run']\n", + " crash = tmp.word_counts['crashloopbackoff']\n", + " word_matrix.append([fail, error, success, run, crash])\n", + "\n", + "# remove noise\n", + "word_matrix = word_matrix[2:]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kHllUg9vwX_H" + }, + "source": [ + "This cell creates a matrix of keywords. each row is a single log entry and each column is a specific keyword" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Xrj2maH9wVJl", + "outputId": "d2d03a5a-9cc5-44c6-b0f7-ee5bb0dd25e1" + }, + "source": [ + "# ******* PARKER *******\n", + "import math\n", + "# Latent Semantic Analysis approach\n", + "\n", + "num_of_docs = [0 for i in range(len(word_matrix[0]))]\n", + "for i in range(len(word_matrix)):\n", + " for j in range(len(word_matrix[i])):\n", + " if word_matrix[i][j] != 0:\n", + " num_of_docs[j] += 1\n", + "for i in range(len(word_matrix)):\n", + " for j in range(len(word_matrix[i])):\n", + " tf = word_matrix[i][j]\n", + " idf = math.log(len(word_matrix) / num_of_docs[j])\n", + " word_matrix[i][j] = tf * idf\n", + "\n", + "# TODO: try to implement anomaly detection with word matrix" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0.023530497410194036, 0.0, 0.05848019881595618, 0.0, 0.0], [0.023530497410194036, 0.0, 0.05848019881595618, 0.0, 0.0], [0.023530497410194036, 0.0, 0.05848019881595618, 0.0, 0.0], [0.023530497410194036, 0.0, 0.05848019881595618, 0.0, 0.0], [0.023530497410194036, 0.0, 0.05848019881595618, 0.0, 0.0], [0.21177447669174632, 0.41928699106748313, 0.023392079526382472, 0.0, 0.0], [0.07059149223058211, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.07059149223058211, 0.5390832742296211, 0.2924009940797809, 0.0, 0.0], [0.07059149223058211, 0.4791851326485521, 0.30409703384297215, 0.0, 0.39390428570708846], [0.07059149223058211, 0.4791851326485521, 0.30409703384297215, 0.0, 0.39390428570708846], [0.07059149223058211, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.0, 0.059898141581069014, 0.0, 0.0, 0.0], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.5390832742296211, 0.3157930736061634, 0.0, 0.0], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.07059149223058211, 0.29949070790534504, 0.1286564373951036, 0.0, 2.363425714242531], [0.04706099482038807, 0.5390832742296211, 0.3157930736061634, 0.0, 0.0], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.41928699106748313, 0.2924009940797809, 0.0, 0.7878085714141769], [0.04706099482038807, 0.41928699106748313, 0.3157930736061634, 0.0, 0.7878085714141769], [0.04706099482038807, 0.29949070790534504, 0.1286564373951036, 0.0, 2.363425714242531], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.4791851326485521, 0.37427327242211955, 0.0, 0.39390428570708846], [0.04706099482038807, 0.6588795573917592, 0.4093613917116933, 0.0, 0.0], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.41928699106748313, 0.3157930736061634, 0.0, 0.7878085714141769], [0.04706099482038807, 0.5989814158106901, 0.4210574314748845, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.1286564373951036, 0.0, 2.363425714242531], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.5390832742296211, 0.3508811928957371, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.29949070790534504, 0.1286564373951036, 0.0, 2.363425714242531], [0.0, 0.4791851326485521, 0.4795376302908407, 0.0, 0.0], [0.04706099482038807, 0.5390832742296211, 0.05848019881595618, 0.0, 0.0], [0.8470979067669853, 1.8568423890131394, 0.198832675974251, 0.0, 2.363425714242531], [0.04706099482038807, 0.29949070790534504, 0.3157930736061634, 0.0, 1.5756171428283539], [3.0119036685048366, 6.289304866012246, 2.818745582929088, 0.0, 0.0], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.41928699106748313, 0.2924009940797809, 0.0, 0.7878085714141769], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [1.270646860150478, 4.552258760161245, 0.36257723265892833, 0.0, 0.39390428570708846], [0.04706099482038807, 0.5989814158106901, 0.37427327242211955, 0.0, 0.0], [2.729537699582508, 2.7553145127291745, 2.6666970660076017, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.5390832742296211, 0.37427327242211955, 0.0, 0.7878085714141769], [0.04706099482038807, 0.41928699106748313, 0.30409703384297215, 0.0, 0.7878085714141769], [0.04706099482038807, 0.41928699106748313, 0.30409703384297215, 0.0, 0.7878085714141769], [0.04706099482038807, 0.5390832742296211, 0.035088119289573706, 0.0, 0.0], [0.04706099482038807, 0.5989814158106901, 0.36257723265892833, 0.0, 0.0], [0.04706099482038807, 0.41928699106748313, 0.2924009940797809, 0.0, 0.7878085714141769], [0.21177447669174632, 0.41928699106748313, 0.023392079526382472, 0.0, 0.0], [0.04706099482038807, 0.41928699106748313, 0.3157930736061634, 0.0, 0.7878085714141769], [0.04706099482038807, 0.5989814158106901, 0.36257723265892833, 0.0, 0.0], [0.04706099482038807, 0.29949070790534504, 0.1286564373951036, 0.0, 2.363425714242531], [0.04706099482038807, 0.5390832742296211, 0.3157930736061634, 0.0, 0.0], [0.04706099482038807, 0.5390832742296211, 0.035088119289573706, 0.0, 0.0], [0.04706099482038807, 0.41928699106748313, 0.2924009940797809, 0.0, 0.7878085714141769], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.5390832742296211, 0.3157930736061634, 0.0, 0.0], [0.04706099482038807, 0.7187776989728282, 0.4093613917116933, 0.0, 0.0], [0.04706099482038807, 0.5390832742296211, 0.3157930736061634, 0.0, 0.7878085714141769], [1.5059518342524183, 6.588795573917592, 0.3274891133693546, 0.0, 0.39390428570708846], [0.14118298446116423, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [1.5765433264830004, 3.953277344350555, 0.28070495431658965, 0.0, 1.5756171428283539], [0.07059149223058211, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.6588795573917592, 0.035088119289573706, 0.0, 0.0], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.41928699106748313, 0.035088119289573706, 0.0, 0.0], [0.04706099482038807, 0.5989814158106901, 0.36257723265892833, 0.0, 0.0], [0.04706099482038807, 0.41928699106748313, 0.2924009940797809, 0.0, 0.7878085714141769], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.5989814158106901, 0.36257723265892833, 0.0, 0.0], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.5390832742296211, 0.2924009940797809, 0.0, 0.0], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846], [0.04706099482038807, 0.29949070790534504, 0.2924009940797809, 0.0, 1.5756171428283539], [0.04706099482038807, 0.29949070790534504, 0.1286564373951036, 0.0, 2.363425714242531], [0.04706099482038807, 0.4791851326485521, 0.3157930736061634, 0.0, 0.39390428570708846]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0V_DFewg4cCn" + }, + "source": [ + "This cell is determining the number of keywords in each log and plotting them.\n", + "TODO: look at logs and find more keywords, instead of failures and successes make each word an entry in a matrix and use SVD to plot similarities" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 458 + }, + "id": "Did5do5Ga_TQ", + "outputId": "3c5cafb4-2d0e-41e7-8157-3d06f19c9cc0" + }, + "source": [ + "# ******* NINGXIAO *******\n", + "# find events in log, display frequecy of each event in a bar chart\n", + "import spacy\n", + "s = \"\"\n", + "for log in logs:\n", + " temp = \"\"\n", + " temp = temp.join(log)\n", + " s += temp\n", + "# Error Message: Text of length 5496377 exceeds maximum of 1000000.\n", + "s = s[0:100000]\n", + "nlp = spacy.load(\"en_core_web_sm\")\n", + "doc = nlp(s)\n", + "dictionary = {};\n", + "for token in doc:\n", + " if token.pos_ == 'VERB':\n", + " if token.text not in dictionary:\n", + " dictionary[token.text] = 1\n", + " else:\n", + " dictionary[token.text] += 1\n", + " #print(token.text, toekn.pos_)\n", + "\n", + "#print(dictionary)\n", + "\n", + "keys = list(dictionary.keys())\n", + "values = list(dictionary.values())\n", + "\n", + "fig, ax = plt.subplots(figsize=(9.2, 7)) \n", + "ax.barh(keys,values)\n", + "plt.title('Verbs in logs')\n", + "plt.ylabel('Verbs')\n", + "plt.xlabel('Frequency')\n", + "\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R1oosA55NoQZ" + }, + "source": [ + "The image above shows the verbs and their frequency appear in the tokenized log text. It indicates what event and how often it happens in the log." + ] + } + ] +} \ No newline at end of file diff --git a/RedHatNLP/Deliverable4.ipynb b/RedHatNLP/Deliverable4.ipynb new file mode 100644 index 00000000..1b49f0a9 --- /dev/null +++ b/RedHatNLP/Deliverable4.ipynb @@ -0,0 +1,1389 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ocp-ci-analysis/RedHatNLP.ipynb at master · parkerwstone/ocp-ci-analysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+ + + +
+ + + + + + + + + +
+
+
+ + + + + + + + + + +
+ +
+ +
+

+ + + / + + ocp-ci-analysis + + +

+ + + forked from aicoe-aiops/ocp-ci-analysis + + +
+ +
    + +
  • + +
    + + + + + + + + Watch + + + + + +
    +
    +

    Notifications

    + +
    + +
    +
    + + + + + + + + +
    + +
    +
    +
    + + +
    +
    + +
    + + + +
  • + +
  • +
    +
    + + +
    +
    + + +
    + +
  • + +
  • +
    +
    + +
  • +
+ +
+ + + + +
+ + +
+
+ + + + +
+ + + + Permalink + + + +
+ +
+
+ + + master + + + +
+
+
+ Switch branches/tags + +
+ + + +
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + +
+ + +
+
+
+
+ +
+ +
+ + + + Go to file + + +
+ + +
+
+
+ + + +
+ +
+
+
 
+
+ +
+
 
+ Cannot retrieve contributors at this time +
+
+ + + + + + + + + + + + +
+ + +
+
+ + + 3.43 MB +
+ +
+ +
+ Download +
+ +
+ + + + +
+ +
+
+
+ + + +
+ +
+
+ + + + +
Sorry, something went wrong. Reload?
+
Sorry, we cannot display this file.
+
Sorry, this file is invalid so it cannot be displayed.
+ +
+
+ +
+ + +
+ + + +
+ + +
+ + +
+
+ + +
+ + + +
+
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + diff --git a/RedHatNLP/Final_Analysis.ipynb b/RedHatNLP/Final_Analysis.ipynb new file mode 100644 index 00000000..17f348f5 --- /dev/null +++ b/RedHatNLP/Final_Analysis.ipynb @@ -0,0 +1,1348 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Final_Analysis", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "P-SchSDwNCgQ" + }, + "source": [ + "

Red Hat NLP Spark Project

\n", + "\n", + "This notebook uses Red Hat's OpenShift data logs and processes them using Drain3. \n", + "\n", + "The first thing this notebook does is webscrape the OpenShift logs and installs dependencies. After, the logs are then cleaned by removing leading tags, dates, timestamps and other unique identifiers. This is done manually because Drain3 does not do this consistently for all logs. Finally the parsed logs are then fed into Drain3 and then put into a dictionary sorted by cluster ID.\n", + "\n", + "The final step that needs to be done is to sift out the pass and fail logs based on their cluster ID and visualize our data." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z5cWx89XM9t6", + "outputId": "0e6ad8e0-73a2-4d65-dd8d-26c2b6912713" + }, + "source": [ + "!pip3 install drain3\n", + "!pip3 install kafka-python\n", + "!pip3 install redis\n", + "!pip install -q tf-models-official" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting drain3\n", + " Downloading https://files.pythonhosted.org/packages/7e/1f/09c4ee7d648b66dda7dc8426e9cf9faeee71e544a2e7700d3d959e5cca59/drain3-0.9.5.tar.gz\n", + "Collecting jsonpickle==1.5.1\n", + " Downloading https://files.pythonhosted.org/packages/77/a7/c2f527ddce3155ae9e008385963c2325cbfd52969f8b38efa2723e2af4af/jsonpickle-1.5.1-py2.py3-none-any.whl\n", + "Requirement already satisfied: cachetools==4.2.1 in /usr/local/lib/python3.7/dist-packages (from drain3) (4.2.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from jsonpickle==1.5.1->drain3) (3.10.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonpickle==1.5.1->drain3) (3.4.1)\n", + "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonpickle==1.5.1->drain3) (3.7.4.3)\n", + "Building wheels for collected packages: drain3\n", + " Building wheel for drain3 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for drain3: filename=drain3-0.9.5-cp37-none-any.whl size=18179 sha256=df021c63200c550dc0982b1034ed467c35c5686e7d82a2590d27a205bb6ec15f\n", + " Stored in directory: /root/.cache/pip/wheels/11/a1/08/125223534f199f0db7a435b437015f83abba341ef0f4f6c64f\n", + "Successfully built drain3\n", + "Installing collected packages: jsonpickle, drain3\n", + "Successfully installed drain3-0.9.5 jsonpickle-1.5.1\n", + "Collecting kafka-python\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/68/dcb0db055309f680ab2931a3eeb22d865604b638acf8c914bedf4c1a0c8c/kafka_python-2.0.2-py2.py3-none-any.whl (246kB)\n", + "\u001b[K |████████████████████████████████| 256kB 5.8MB/s \n", + "\u001b[?25hInstalling collected packages: kafka-python\n", + "Successfully installed kafka-python-2.0.2\n", + "Collecting redis\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a7/7c/24fb0511df653cf1a5d938d8f5d19802a88cef255706fdda242ff97e91b7/redis-3.5.3-py2.py3-none-any.whl (72kB)\n", + "\u001b[K |████████████████████████████████| 81kB 3.2MB/s \n", + "\u001b[?25hInstalling collected packages: redis\n", + "Successfully installed redis-3.5.3\n", + "\u001b[K |████████████████████████████████| 1.1MB 5.7MB/s \n", + "\u001b[K |████████████████████████████████| 174kB 13.4MB/s \n", + "\u001b[K |████████████████████████████████| 1.2MB 18.3MB/s \n", + "\u001b[K |████████████████████████████████| 51kB 5.1MB/s \n", + "\u001b[K |████████████████████████████████| 358kB 28.6MB/s \n", + "\u001b[K |████████████████████████████████| 706kB 28.8MB/s \n", + "\u001b[K |████████████████████████████████| 102kB 8.8MB/s \n", + "\u001b[K |████████████████████████████████| 645kB 39.7MB/s \n", + "\u001b[K |████████████████████████████████| 37.6MB 1.3MB/s \n", + "\u001b[?25h Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for py-cpuinfo (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Bq5k5nqiC02i" + }, + "source": [ + "from urllib.request import urlopen\n", + "from bs4 import BeautifulSoup\n", + "from google.colab import files\n", + "import pandas as pd\n", + "import io\n", + "import numpy as np\n", + "import dateutil\n", + "from dateutil import parser\n", + "import textblob\n", + "import requests\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "import pickle\n", + "import tensorflow as tf\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import StratifiedShuffleSplit\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import precision_score\n", + "from sklearn.metrics import recall_score\n", + "from sklearn.metrics import f1_score\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "import drain3\n", + "from drain3 import TemplateMiner\n", + "import json\n", + "import logging\n", + "import sys\n", + "from drain3.kafka_persistence import KafkaPersistence\n", + "import re\n", + "\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import StratifiedShuffleSplit\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import RepeatedStratifiedKFold\n", + "import subprocess\n", + "import time\n", + "import spacy\n", + "\n", + "from xgboost import XGBClassifier\n", + "import os, os.path\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Iq11lna0RAee" + }, + "source": [ + "#

Dataset

\n", + "\n", + "Gather log files from OpenShift and save them to shared google drive, Red Hat & BU." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "st2dPqa-UuWF", + "outputId": "53eb705f-35a1-4a20-8f58-1ff0884534ac" + }, + "source": [ + "\n", + "#enable to read files from the google drive\n", + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3plLsNUbQdBo", + "outputId": "3182b287-4da0-4a2d-e2ca-32062109c696" + }, + "source": [ + "with open ('/content/drive/MyDrive/RedHat_BU/log/log_file_1.ob', 'rb') as fp:\n", + " logs1 = pickle.load(fp)\n", + "#print(logs[1])\n", + "\n", + "with open ('/content/drive/MyDrive/RedHat_BU/label/label_file_1.ob', 'rb') as fp:\n", + " labels1 = pickle.load(fp)\n", + "#print(labels)\n", + "\n", + "print(\"number of logs: \", len(logs1))\n", + "print(\"number of labels: \",len(labels1))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "number of logs: 416\n", + "number of labels: 416\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fj1i40jxS0MA", + "outputId": "a1c430d0-ea64-4b23-d857-58805df722b2" + }, + "source": [ + "#increase data size by twice (416+452=868)\n", + "with open ('/content/drive/MyDrive/RedHat_BU/log/log_file_2.ob', 'rb') as fp:\n", + " logs2 = pickle.load(fp)\n", + "#print(logs[1])\n", + "\n", + "with open ('/content/drive/MyDrive/RedHat_BU/label/label_file_2.ob', 'rb') as fp:\n", + " labels2 = pickle.load(fp)\n", + "#print(labels)\n", + "print(\"number of logs: \", len(logs2))\n", + "print(\"number of labels: \",len(labels2))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "number of logs: 452\n", + "number of labels: 452\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dhXs4CqLUs9n", + "outputId": "5228254b-00a2-44f0-bf2f-bddf283ad303" + }, + "source": [ + "#increase data size by 3 times (868+2958=3826)\n", + "with open ('/content/drive/MyDrive/RedHat_BU/log/log_file_3.ob', 'rb') as fp:\n", + " logs3 = pickle.load(fp)\n", + "#print(logs[1])\n", + "\n", + "with open ('/content/drive/MyDrive/RedHat_BU/label/label_file_3.ob', 'rb') as fp:\n", + " labels3 = pickle.load(fp)\n", + "#print(labels)\n", + "print(\"number of logs: \", len(logs3))\n", + "print(\"number of labels: \",len(labels3))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "number of logs: 2958\n", + "number of labels: 2958\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GvYhITN47N4P", + "outputId": "a289ff5c-34e5-41c5-802f-c43730f494f2" + }, + "source": [ + "#increase data size 3826+168=3994\n", + "with open ('/content/drive/MyDrive/RedHat_BU/log/log_file_4.ob', 'rb') as fp:\n", + " logs4 = pickle.load(fp)\n", + "#print(logs[1])\n", + "\n", + "with open ('/content/drive/MyDrive/RedHat_BU/label/label_file_4.ob', 'rb') as fp:\n", + " labels4 = pickle.load(fp)\n", + "#print(labels)\n", + "print(\"number of logs: \", len(logs4))\n", + "print(\"number of labels: \",len(labels4))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "number of logs: 168\n", + "number of labels: 168\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "18ETd8BvS3Ne", + "outputId": "472993c0-04ae-4462-b694-6ed768a32547" + }, + "source": [ + "logs = logs1# + logs2 + logs3 + logs4 \n", + "labels = labels1# + labels2 + labels3 + labels4 \n", + "print(\"number of logs: \", len(logs))\n", + "print(\"number of labels: \",len(labels))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "number of logs: 416\n", + "number of labels: 416\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "heDQxXYAQzcU" + }, + "source": [ + "#

Assigning Labels to Logs

" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S056NuDx71Wt", + "outputId": "355a0485-8e06-49f3-c0fb-215b0e30d8fc" + }, + "source": [ + "# 1 := success, 0 := fail\n", + "y = []\n", + "count_0 = 0\n", + "count_1 = 0\n", + "for label in labels:\n", + " if label == 'SUCCESS':\n", + " y.append(1)\n", + " count_1 = count_1 + 1\n", + " else:\n", + " y.append(0)\n", + " count_0 = count_0 + 1\n", + "print(\"y = \", y)\n", + "print(\"number of success/1 = \", count_1)\n", + "print(\"number of fail/0 = \", count_0)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n", + "number of success/1 = 276\n", + "number of fail/0 = 140\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pb5gAOrHQ67U" + }, + "source": [ + "

Splitting log lines for vectorization

" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BuzkkC0dOamf", + "outputId": "ff7da8e0-bc17-425c-8691-0e7588906346" + }, + "source": [ + "vocab = {}\n", + "i = 0\n", + "for log in logs:\n", + " log = str(log).split(\",\")\n", + " for line in log:\n", + " line = line.replace(\"'\",\"\").replace(\" \",\"\").replace(\"\\\"\",\"\")\n", + " if line not in vocab:\n", + " vocab[line] = i\n", + " i+=1\n", + "print(len(vocab))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "39584\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "d__aAxaiXGmG" + }, + "source": [ + "#print(list(vocab.items())[:100])" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aTYgLuEaRHiX" + }, + "source": [ + "#

Classifying logs without Drain3 and parsing

\n", + "\n", + "Here begins the process of vectorizing each log line and then classifying it as a pass or fail log accordingly. If this cell has an error, run the cell that removes newline characters and the leading b' tag from the logs." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "sUhrDxlNO7JS" + }, + "source": [ + "vectorizer = TfidfVectorizer(vocabulary = vocab)\n", + "\n", + "X = vectorizer.fit_transform(logs)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ukvCx6NDUhU5", + "outputId": "ae14ab29-26f9-4463-bbc0-0aa3e04f047c" + }, + "source": [ + "X.shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(416, 39584)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TPerlihUQUkT" + }, + "source": [ + "sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, train_size= 0.8, random_state=1)\n", + "sss.get_n_splits(X, y)\n", + "y = np.array(y)\n", + "for train_index, test_index in sss.split(X, y):\n", + " X_train, X_test = X[train_index], X[test_index]\n", + " y_train, y_test = y[train_index], y[test_index]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JjhiyTqNr9Gs", + "outputId": "86dfda38-01f1-40de-9330-cdae8b583274" + }, + "source": [ + "#tfidf\n", + "X_train.shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(332, 39584)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 39 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BJcVgQrOVeI-" + }, + "source": [ + "# Classifying logs without Drain3 with XGB\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SMY0yIo1zt4d" + }, + "source": [ + "#training accuracy calculator\n", + "def accuracy_cal(prediction, Testy):\n", + " count = 0\n", + " for i in range(len(prediction)):\n", + " if prediction[i] == int(Testy[i]):\n", + " count+=1\n", + " accuracy = round(count / len(prediction) * 100, 4)\n", + " #print(accuracy, \"%\")\n", + " return accuracy\n", + "\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WAm7oirNUc9P", + "outputId": "b30b7878-e930-47f8-dbf3-d1f849bcc3a0" + }, + "source": [ + "model_XGB = XGBClassifier(scale_pos_weight=99).fit(X_train, y_train)\n", + "y_test_predictions = model_XGB.predict(X_test)\n", + "accuracy = accuracy_cal(y_test_predictions, y_test)\n", + "\n", + "y_train_predictions = model_XGB.predict(X_train)\n", + "training_error = mean_squared_error(y_train,y_train_predictions)\n", + "\n", + "\n", + "print(\"=== Confusion Matrix ===\")\n", + "print(confusion_matrix(y_test, y_test_predictions))\n", + "\n", + "print(\"=== Classification Report ===\")\n", + "print(classification_report(y_test, y_test_predictions))\n", + "\n", + "print(\"Training error: \",round(training_error,4))\n", + "print(\"Training accuracy: \", accuracy,\"%\")\n", + "print(\"Precision score: {}\".format(precision_score(y_test, y_test_predictions)))\n", + "print(\"Recall score: {}\".format(recall_score(y_test, y_test_predictions)))\n", + "print(\"F1 Score: {}\".format(f1_score(y_test, y_test_predictions)))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "=== Confusion Matrix ===\n", + "[[24 4]\n", + " [ 0 56]]\n", + "=== Classification Report ===\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 0.86 0.92 28\n", + " 1 0.93 1.00 0.97 56\n", + "\n", + " accuracy 0.95 84\n", + " macro avg 0.97 0.93 0.94 84\n", + "weighted avg 0.96 0.95 0.95 84\n", + "\n", + "Training error: 0.0422\n", + "Training accuracy: 95.2381 %\n", + "Precision score: 0.9333333333333333\n", + "Recall score: 1.0\n", + "F1 Score: 0.9655172413793104\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BPdgFG7Da0Pw" + }, + "source": [ + "\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4UBShV3kRskG" + }, + "source": [ + "#

Parsing logs before processing into Drain3

\n", + "\n", + "Logs need to be parsed before processing into Drain3. This is necessary because Drain3 does not parse consistently for every log.\n", + "\n", + "Things to be parsed:\n", + "
    \n", + "
  • Dates
  • \n", + "
  • Timestamps
  • \n", + "
  • Newline characters
  • \n", + "
  • Version numbers
  • \n", + "
  • Namespace IDs
  • \n", + "
  • URLs
  • \n", + "
" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pfWCburJvXQA" + }, + "source": [ + "original_log = logs[1]\n", + "#print(original_log)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "aRyol0yU4HP_" + }, + "source": [ + "# helper function detecting if a string is a date / timestamp\n", + "parsed_logs = []\n", + "def is_date(str):\n", + " try:\n", + " dateutil.parser.parse(str)\n", + " return True\n", + " except:\n", + " return False\n", + "for i in range(len(logs)):\n", + " # splitting each section as its own index (for parsing)\n", + " parsed_logs.append(str(logs[i]).split(' '))\n", + " for j in range(len(parsed_logs[i])):\n", + " if is_date(parsed_logs[i][j]) == True:\n", + " parsed_logs[i][j] = ''\n", + " parsed_logs[i] = list(filter(lambda x: x != '', parsed_logs[i]))\n", + " parsed_logs[i] = ' '.join(parsed_logs[i])" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tSINSnQeDqZ9" + }, + "source": [ + "This code cell removes timestamps and dates in order to mitigate unique identifiers" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "L8a8Nyc76PQo" + }, + "source": [ + "for index in range(len(parsed_logs)):\n", + " # removing version number\n", + " if \"version\" in parsed_logs[index]:\n", + " tmp = parsed_logs[index].split(\"version\",1)\n", + " tmp_1 = tmp[0]\n", + " tmp_2 = tmp[1].split()\n", + " tmp_2 = tmp_2[1:]\n", + " tmp = ''.join(tmp_1) + ' '.join(tmp_2)\n", + " parsed_logs[index] = tmp\n", + "\n", + " # removing creating namespace ID\n", + " if \"Creating namespace\" in parsed_logs[index]:\n", + " tmp = parsed_logs[index].split(\"Creating namespace\", 1)\n", + " tmp_1 = tmp[0]\n", + " tmp_2 = tmp[1].split()\n", + " tmp_2 = tmp_2[1:]\n", + " tmp = ''.join(tmp_1) + ' '.join(tmp_2)\n", + " parsed_logs[index] = tmp\n", + "\n", + " # removing using namespace ID\n", + " if \"Using namespace\" in parsed_logs[index]:\n", + " tmp = parsed_logs[index].split(\"Using namespace\", 1)\n", + " tmp_1 = tmp[0]\n", + " tmp_2 = tmp[1].split()\n", + " tmp_2 = tmp_2[1:]\n", + " tmp = ''.join(tmp_1) + ' '.join(tmp_2)\n", + " parsed_logs[index] = tmp\n", + "\n", + " # removing Imported release stamp\n", + " if \"Imported release\" in parsed_logs[index]:\n", + " tmp = parsed_logs[index].split(\"Imported release\", 1)\n", + " tmp_1 = tmp[0]\n", + " tmp_2 = tmp[1].split()\n", + " tmp_2 = tmp_2[1:]\n", + " tmp = ''.join(tmp_1) + ' '.join(tmp_2)\n", + " parsed_logs[index] = tmp\n", + "\n", + " # removing Acquired lease stamp\n", + " if \"Acquired lease\" in parsed_logs[index]:\n", + " tmp = parsed_logs[index].split(\"Acquired lease\", 1)\n", + " tmp_1 = tmp[0]\n", + " tmp_2 = tmp[1].split()\n", + " tmp_2 = tmp_2[1:]\n", + " tmp = ''.join(tmp_1) + ' '.join(tmp_2)\n", + " parsed_logs[index] = tmp\n", + "\n", + " # removing \"images will be pullable from\" link\n", + " if \"images will be pullable from\" in parsed_logs[index]:\n", + " tmp = parsed_logs[index].split(\"images will be pullable from\", 1)\n", + " tmp_1 = tmp[0]\n", + " tmp_2 = tmp[1].split()\n", + " tmp_2 = tmp_2[1:]\n", + " tmp = ''.join(tmp_1) + ' '.join(tmp_2)\n", + " parsed_logs[index] = tmp" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RDSHV7fZTOBT" + }, + "source": [ + "#

Drain3 Processing

\n", + "\n", + "The parsed logs are now being processed in Drain3. Drain3 will do additional parsing and also cluster the logs. Drain3 uses longest common subsequence as their algorithm for clustering, so logs with similar structure will be considered to be in the same clustering. There are two dictionaries that represent the size of each cluster and another that separates the logs by cluster ID. The goal of this is to determine which clusters are pass logs and which clusters are fail logs." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WtFxnlMOmp3W", + "outputId": "f461ceaa-2a1f-49d4-ede5-e9ac55abe0de" + }, + "source": [ + "\n", + "template_miner = TemplateMiner(None)\n", + "i = 0\n", + "ints_from_drain = []\n", + "while True:\n", + " if i >= len(parsed_logs):\n", + " break\n", + " log_line = ' '.join(parsed_logs[i])\n", + " i += 1\n", + " if log_line == 'q':\n", + " break\n", + " result = template_miner.add_log_message(log_line)\n", + " result_json = json.dumps(result)\n", + " ints_from_drain.append(re.findall(r'\\d+', result_json))\n", + " # print(result_json)\n", + "\n", + "ints_from_drain = np.asarray(ints_from_drain)\n", + "\n", + "cluster_size = {}\n", + "cluster_id = {}\n", + "for cluster in template_miner.drain.clusters:\n", + " if cluster.cluster_id not in cluster_size:\n", + " cluster_size[cluster.cluster_id] = cluster.size\n", + " cluster_id[cluster.cluster_id] = []\n", + "\n", + "\n", + "for i in range(len(ints_from_drain)):\n", + " cluster_id[int(ints_from_drain[i][0])].append(parsed_logs[i])\n", + "\n", + "print(cluster_size)\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "WARNING:drain3.template_miner_config:config file not found: drain3.ini\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "{1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 1, 62: 1, 63: 1, 64: 1, 65: 1, 66: 1, 67: 1, 68: 1, 69: 1, 70: 1, 71: 1, 72: 1, 73: 1, 74: 1, 75: 1, 76: 1, 77: 1, 78: 1, 79: 1, 80: 1, 81: 1, 82: 1, 83: 1, 84: 1, 85: 1, 86: 1, 87: 1, 88: 1, 89: 1, 90: 10, 91: 2, 92: 16, 93: 10, 94: 2, 95: 10, 96: 2, 97: 4, 98: 2, 99: 2, 100: 12, 101: 2, 102: 2, 103: 2, 104: 6, 105: 2, 106: 2, 107: 2, 108: 2, 109: 34, 110: 2, 111: 2, 112: 2, 113: 2, 114: 6, 115: 2, 116: 2, 117: 2, 118: 20, 119: 2, 120: 2, 121: 2, 122: 2, 123: 2, 124: 6, 125: 2, 126: 6, 127: 4, 128: 74, 129: 8, 130: 2, 131: 2, 132: 2, 133: 2, 134: 6, 135: 4, 136: 2, 137: 2, 138: 2, 139: 2, 140: 2, 141: 2, 142: 2, 143: 4, 144: 2, 145: 2, 146: 2, 147: 4, 148: 2, 149: 2}\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", + " return array(a, dtype, copy=False, order=order)\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UlznByNoD3w1" + }, + "source": [ + "#

Parsing Results Print out

\n", + "\n", + "Here is a snippet example of what the parsing process does to each log. As you can see the leading byte tag, dates, and unique URLs will be parsed out from both manual parsing and Drain3." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6rqae9VQDaoS", + "outputId": "053fe8d2-9148-491d-99a8-4f21ab5ac999" + }, + "source": [ + "print(original_log.split(',')[:3])\n", + "print(parsed_logs[1].split(',')[:3])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['b\\'2020/10/28 22:31:45 ci-operator version v20201028-4f6c4ca\"', \" '2020/10/28 22:31:45 No source defined'\", \" '2020/10/28 22:31:45 Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.5-ci'\"]\n", + "[\"b'2020/10/28 ci-operator No source defined'\", \" Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.5-ci'\", ' Running [release-inputs]']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aMuEAdZvVZUX" + }, + "source": [ + "#

Log cluster example

\n", + "\n", + "Here is a printout of logs that fall under a cluster with the ID of 94. This cluster contains the most amount of data logs. Based on the clustering algorithm it is evident that Drain3 uses longest common sequence because all of these logs in this cluster contain the exact same structure with very small differences, especially after being parsed." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Rx-UE8x2VXRg", + "outputId": "d9be061b-1a75-49c6-8f45-6168cd442339" + }, + "source": [ + "i = 1\n", + "for log in cluster_id[95]:\n", + " print(\"log\", i, \":\", log[:200], \"...\")\n", + " print()\n", + " i += 1" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "log 1 : b'2020/11/10 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 2 : b'2020/11/11 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 3 : b'2020/11/12 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 4 : b'2020/11/13 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 5 : b'2020/11/15 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 6 : b'2020/11/10 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 7 : b'2020/11/11 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 8 : b'2020/11/12 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 9 : b'2020/11/13 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n", + "log 10 : b'2020/11/15 ci-operator No source defined', Resolved release latest to registry.svc.ci.openshift.org/ocp/release:4.1', warning: overriding parameter \"LEASED_RESOURCE\"', Running [release-inputs], e2e- ...\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yMgfiryITDZ5" + }, + "source": [ + "#

Classifying logs with Drain3 and parsing

" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zm8de6q1RvAx", + "outputId": "266cdb52-5b5e-4caf-eb2e-1ab729a05db7" + }, + "source": [ + "vocab = {}\n", + "i = 0\n", + "hhh = []\n", + "for log in parsed_logs:\n", + " log = str(log).split(\",\")\n", + " for line in log:\n", + " line = line.replace(\"'\",\"\").replace(\" \",\"\").replace(\"\\\"\",\"\")\n", + " if line not in vocab:\n", + " vocab[line] = i\n", + " i+=1\n", + "print(len(vocab))\n", + "# print(hhh)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "27820\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "feNguNBnV7c_" + }, + "source": [ + "

Vectorizing log lines

" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "M-7kd8rRR4qL" + }, + "source": [ + "vectorizer = TfidfVectorizer(vocabulary = vocab)\n", + "X2 = vectorizer.fit_transform(parsed_logs)\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LEYrmPmGWeK3", + "outputId": "d778b6ce-f857-4cc4-ceaa-18ffc8ff7e34" + }, + "source": [ + "print(X2.shape)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(416, 27820)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gfJt0J1bR9ir" + }, + "source": [ + "sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, train_size= 0.8, random_state=1)\n", + "sss.get_n_splits(X2, y)\n", + "y = np.array(y)\n", + "for train_index, test_index in sss.split(X2, y):\n", + " X_train2, X_test2 = X2[train_index], X2[test_index]\n", + " y_train, y_test = y[train_index], y[test_index]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fdOevl5l1Hxt", + "outputId": "6740e176-0687-4842-c4e7-8c2b77e07ac6" + }, + "source": [ + "#tfidf\n", + "X_train2.shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(332, 27820)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 52 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6tvepY3kYhXJ" + }, + "source": [ + "# Classifying logs with Drain3 with XGB" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "avuHz_QVYl3_", + "outputId": "aa333d26-4768-4d83-a547-2fa632c22902" + }, + "source": [ + "model_XGB_D = XGBClassifier(scale_pos_weight=99).fit(X_train2, y_train)\n", + "y_test_predictions = model_XGB_D.predict(X_test2)\n", + "accuracy = accuracy_cal(y_test_predictions, y_test)\n", + "\n", + "y_train_predictions = model_XGB_D.predict(X_train2)\n", + "training_error = mean_squared_error(y_train,y_train_predictions)\n", + "\n", + "\n", + "print(\"=== Confusion Matrix ===\")\n", + "print(confusion_matrix(y_test, y_test_predictions))\n", + "\n", + "print(\"=== Classification Report ===\")\n", + "print(classification_report(y_test, y_test_predictions))\n", + "\n", + "\n", + "print(\"Training error: \",round(training_error,4))\n", + "print(\"Training accuracy: \", accuracy,\"%\")\n", + "print(\"Precision score: {}\".format(precision_score(y_test, y_test_predictions)))\n", + "print(\"Recall score: {}\".format(recall_score(y_test, y_test_predictions)))\n", + "print(\"F1 Score: {}\".format(f1_score(y_test, y_test_predictions)))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "=== Confusion Matrix ===\n", + "[[21 7]\n", + " [ 0 56]]\n", + "=== Classification Report ===\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 0.75 0.86 28\n", + " 1 0.89 1.00 0.94 56\n", + "\n", + " accuracy 0.92 84\n", + " macro avg 0.94 0.88 0.90 84\n", + "weighted avg 0.93 0.92 0.91 84\n", + "\n", + "Training error: 0.0753\n", + "Training accuracy: 91.6667 %\n", + "Precision score: 0.8888888888888888\n", + "Recall score: 1.0\n", + "F1 Score: 0.9411764705882353\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sTiDDkU5Oqy8" + }, + "source": [ + "#Visualizations\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IuBApuTZEjjW" + }, + "source": [ + "k = [100,200,300,400,416]\n", + "#k = [200,400,600,800,868]\n", + "#k = [700,1400,2100,2800,3500,3826]\n", + "#k = [500,1000,1500,2000,2500,3000,3500,3994]\n", + "sdp=[]\n", + "snp=[]\n", + "sdf1=[]\n", + "snf1=[]\n", + "acc_n = []\n", + "acc_d = []\n", + "X[0:k[0]].shape\n", + "for i in range(len(k)):\n", + " DrainX = X[0:k[i]]\n", + " NoDrainX = X2[0:k[i]]\n", + " lb = y[0:k[i]]\n", + " sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, train_size= 0.8, random_state=1)\n", + " sss.get_n_splits(DrainX, lb)\n", + " lb = np.array(lb)\n", + " for train_index, test_index in sss.split(DrainX, lb):\n", + " Xd_train, Xd_test = DrainX[train_index], DrainX[test_index] \n", + " Xn_train, Xn_test = NoDrainX[train_index], NoDrainX[test_index] \n", + " y_train, y_test = lb[train_index], lb[test_index]\n", + "\n", + " model_XGB_D = XGBClassifier(scale_pos_weight=99).fit(Xd_train, y_train) \n", + " yd_test_predictions = model_XGB_D.predict(Xd_test)\n", + " sd_pre = precision_score(y_test, yd_test_predictions)\n", + " sdp.append(sd_pre)\n", + " sd_f1 = f1_score(y_test, yd_test_predictions)\n", + " sdf1.append(sd_f1)\n", + " acc_d.append(accuracy_cal(yd_test_predictions, y_test))\n", + "\n", + " model_XGB = XGBClassifier(scale_pos_weight=99).fit(Xn_train, y_train) \n", + " yn_test_predictions = model_XGB.predict(Xn_test)\n", + " sn_pre = precision_score(y_test, yn_test_predictions)\n", + " snp.append(sn_pre)\n", + " sn_f1 = f1_score(y_test, yn_test_predictions)\n", + " snf1.append(sn_f1)\n", + " acc_n.append(accuracy_cal(yn_test_predictions, y_test))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 513 + }, + "id": "r7TIRdELWein", + "outputId": "27e4b6e4-c7ad-4edf-c793-a7294b2bc6e9" + }, + "source": [ + "fig = plt.figure(figsize=(6,8))\n", + "\n", + "fig.subplots_adjust(hspace=1.4, wspace=1.4)\n", + "ax = fig.add_subplot(3, 1, 1)\n", + "ax.plot(k, acc_d, label = \"XGB_Drain\")\n", + "ax.plot(k, acc_n, label = \"XGB\")\n", + "plt.xlabel(\"number of data\")\n", + "plt.ylabel(\"accuracy\")\n", + "plt.title('Accuracy of classification for log data with and without processing')\n", + "plt.legend()\n", + "\n", + "ax = fig.add_subplot(3, 1, 2)\n", + "ax.plot(k, sdp, label = \"XGB\")\n", + "ax.plot(k, snp, label = \"XGB_Drain\")\n", + "plt.xlabel(\"number of data\")\n", + "plt.ylabel(\"precision score\")\n", + "plt.title('Precision score of classification for log data with and without processing')\n", + "plt.legend()\n", + "\n", + "ax = fig.add_subplot(3, 1, 3)\n", + "ax.plot(k, sdf1, label = \"XGB_Drain\")\n", + "ax.plot(k, snf1, label = \"XGB\")\n", + "plt.xlabel(\"number of data\")\n", + "plt.ylabel(\"F1 score\")\n", + "plt.title('F1 score of classification for log data with and without processing')\n", + "plt.legend()\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hth_ioBpm7_7" + }, + "source": [ + "#

Cross validation

\n", + "the code below it to verify if there is overfit in our model when the dataset size changes\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iZbqDPOQjdTK" + }, + "source": [ + "k = [100,200,300,400,416]\n", + "#k = [200,400,600,800,868]\n", + "#k = [700,1400,2100,2800,3500,3826]\n", + "#k = [500,1000,1500,2000,2500,3000,3500,3994]\n", + "scores_d = [] #Drain\n", + "scores_s = [] #No Drain\n", + "for i in range(len(k)):\n", + " DrainX = X[0:k[i]]\n", + " NoDrainX = X2[0:k[i]]\n", + " lb = y[0:k[i]]\n", + " cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)\n", + " # evaluate model\n", + " scores_d.append(np.mean(cross_val_score(model_XGB_D, DrainX, lb, scoring='roc_auc', cv=cv, n_jobs=-1)))\n", + " \n", + " cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)\n", + " # evaluate model\n", + " scores_s.append(np.mean(cross_val_score(model_XGB, NoDrainX, lb, scoring='roc_auc', cv=cv, n_jobs=-1)))\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "faXyTLBlj8wE", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 313 + }, + "outputId": "9840fb21-eae1-4bc6-defc-b9feb70d85d9" + }, + "source": [ + "#fig = plt.figure(figsize=(6,8))\n", + "\n", + "\n", + "plt.plot(k, scores_d, label = \"XGB\")\n", + "plt.plot(k, scores_s, label = \"XGB_Drain\")\n", + "plt.xlabel(\"number of data\")\n", + "plt.ylabel(\"roc_auc\")\n", + "plt.title('Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores')\n", + "plt.legend()\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 59 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/RedHatNLP/Final_WebScraping.ipynb b/RedHatNLP/Final_WebScraping.ipynb new file mode 100644 index 00000000..4808c0d7 --- /dev/null +++ b/RedHatNLP/Final_WebScraping.ipynb @@ -0,0 +1,506 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Final_WebScraping", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "P-SchSDwNCgQ" + }, + "source": [ + "

Web Scraping for NLP Project

\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Bq5k5nqiC02i" + }, + "source": [ + "from urllib.request import urlopen\n", + "from bs4 import BeautifulSoup\n", + "from google.colab import files\n", + "import pandas as pd\n", + "import io\n", + "import numpy as np\n", + "import dateutil\n", + "from dateutil import parser\n", + "import textblob\n", + "import requests\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import StratifiedShuffleSplit\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import precision_score\n", + "from sklearn.metrics import recall_score\n", + "from sklearn.metrics import f1_score\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "\n", + "\n", + "\n", + "import json\n", + "import logging\n", + "import sys\n", + "import re\n", + "\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import StratifiedShuffleSplit\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "import subprocess\n", + "import time\n", + "import spacy\n", + "import pickle\n", + "from xgboost import XGBClassifier" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kE517G6j_3Su", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "78e7e923-3795-40ab-96da-860ce5f830c0" + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Iq11lna0RAee" + }, + "source": [ + "#

Web scraping

\n", + "\n", + "Gathers OpenShift using BeautifulSoup and then returns an array containing all of the logs." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XHDHu0nR9RFe" + }, + "source": [ + "#log_file_1\n", + "base1 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/canary-release-openshift-origin-installer-e2e-aws-4.5-cnv/\"\n", + "base2 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/release-openshift-ocp-installer-e2e-aws-serial-4.1/\"\n", + "base3 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/release-openshift-ocp-installer-e2e-aws-serial-4.3/\"\n", + "\n", + "#log_file_2\n", + "base4 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-open-cluster-management-governance-policy-propagator-main-images/\"\n", + "base5 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-open-cluster-management-console-release-2.3-images/\"\n", + "base6 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-kubevirt-hyperconverged-cluster-operator-release-4.9-images/\"\n", + "base7=\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-openshift-tektoncd-triggers-release-next-4.5-images/\"\n", + "base8=\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-openshift-local-storage-operator-master-images/\"\n", + "base9=\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.5-e2e-aws-upgrade-rollback/\"\n", + "\n", + "#log_file_3\n", + "base10 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-codeready-toolchain-host-operator-master-test/\"\n", + "base11 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-integr8ly-integreatly-operator-3scale-next-0.7.0-images/\"\n", + "base12 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/branch-ci-open-cluster-management-multicloud-operators-foundation-master-fast-forward/\"\n", + "\n", + "#log_file_4\n", + "base13 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-red-hat-data-services-opendatahub-operator-master-modh-operator-e2e-nightly/\"\n", + "base14 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/release-openshift-origin-installer-e2e-aws-sdn-multitenant-4.6/\"\n", + "\n", + "#log_file_5\n", + "base15 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ocp-4.5-ci-e2e-44-stable-to-45-ci/\"\n", + "base16 =\"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ocp-4.7-e2e-metal-assisted-onprem/\"\n", + "base17 = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-operator-framework-operator-lifecycle-managment-rhoperator-metric-e2e-aws-olm-release-4.4-daily/\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "RT1xSMJ40ewc", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5b979c6b-1fb2-441f-e09a-d776a072d3cf" + }, + "source": [ + "# Web scraping \n", + "# BeautifulSoup for web scraping\n", + "#url core needed to pull\n", + "website = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com\"\n", + "\n", + "base = base15\n", + "ending = \"build-log.txt\"\n", + "finished = \"finished.json\"\n", + "page = requests.get(base) \n", + "data = page.text\n", + "soup = BeautifulSoup(data)\n", + "links = []\n", + "for link in soup.find_all('a'):\n", + " links.append(link.get('href'))\n", + "links = links[1:-1]\n", + "\n", + "final_array = []\n", + "labels_link = []\n", + "# create array of urls\n", + "for x in range(len(links)):\n", + " final_array.append(str(website) + str(links[x]) + str(ending))\n", + " labels_link.append(str(website) + str(links[x]) + str(finished))\n", + "\n", + "# pull all urls logs and store in 2-d array where array_of_logs[x] is a build-log file and \n", + "# array_of_logs[x][y] is an individual log line split by new line\n", + "\n", + "array_of_logs1 = []\n", + "for x in range(len(final_array)):\n", + " page = urlopen(final_array[x])\n", + " html_bytes = page.read()\n", + " array_of_logs1.append(str(html_bytes).split('\\\\n'))\n", + "\n", + "print(len(array_of_logs1))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "490\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IgNDOUFBG4zX", + "outputId": "d7251cab-a0a4-4d43-c207-2ac948d40b3d" + }, + "source": [ + "# Web scraping \n", + "# BeautifulSoup for web scraping\n", + "#url core needed to pull\n", + "website = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com\"\n", + "\n", + "base = base16\n", + "ending = \"build-log.txt\"\n", + "finished = \"finished.json\"\n", + "page = requests.get(base) \n", + "data = page.text\n", + "soup = BeautifulSoup(data)\n", + "links = []\n", + "for link in soup.find_all('a'):\n", + " links.append(link.get('href'))\n", + "links = links[1:-1]\n", + "\n", + "final_array = []\n", + "labels_link2 = []\n", + "# create array of urls\n", + "for x in range(len(links)):\n", + " final_array.append(str(website) + str(links[x]) + str(ending))\n", + " labels_link2.append(str(website) + str(links[x]) + str(finished))\n", + "\n", + "# pull all urls logs and store in 2-d array where array_of_logs[x] is a build-log file and \n", + "# array_of_logs[x][y] is an individual log line split by new line\n", + "\n", + "array_of_logs2 = []\n", + "for x in range(len(final_array)):\n", + " page = urlopen(final_array[x])\n", + " html_bytes = page.read()\n", + " array_of_logs2.append(str(html_bytes).split('\\\\n'))\n", + "\n", + "print(len(array_of_logs2))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "247\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bu6M41HEHbHm", + "outputId": "91c0b7eb-c71c-4675-f3a1-867669ad40c4" + }, + "source": [ + "# Web scraping \n", + "# BeautifulSoup for web scraping\n", + "#url core needed to pull\n", + "website = \"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com\"\n", + "\n", + "base = base17\n", + "ending = \"build-log.txt\"\n", + "finished = \"finished.json\"\n", + "page = requests.get(base) \n", + "data = page.text\n", + "soup = BeautifulSoup(data)\n", + "links = []\n", + "for link in soup.find_all('a'):\n", + " links.append(link.get('href'))\n", + "links = links[1:-1]\n", + "\n", + "final_array = []\n", + "labels_link3 = []\n", + "# create array of urls\n", + "for x in range(len(links)):\n", + " final_array.append(str(website) + str(links[x]) + str(ending))\n", + " labels_link3.append(str(website) + str(links[x]) + str(finished))\n", + "\n", + "# pull all urls logs and store in 2-d array where array_of_logs[x] is a build-log file and \n", + "# array_of_logs[x][y] is an individual log line split by new line\n", + "\n", + "array_of_logs3 = []\n", + "for x in range(len(final_array)):\n", + " page = urlopen(final_array[x])\n", + " html_bytes = page.read()\n", + " array_of_logs3.append(str(html_bytes).split('\\\\n'))\n", + "\n", + "print(len(array_of_logs3))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "182\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LEvrmTCnTu-S", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "29bb768b-0866-4f39-84f2-c73361e0bb40" + }, + "source": [ + "array_of_logs = array_of_logs1 #+ array_of_logs2 #+ array_of_logs3\n", + "print(len(array_of_logs))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "490\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WGNoRQkn7j3E" + }, + "source": [ + "# IMPORTANT\n", + "\n", + "Run this cell below a few times in a row in order for it to completely remove all the necessary things it is supposed to remove." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JsfDoHsxQmqH", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 232 + }, + "outputId": "0d58766a-7484-4d57-fdd7-1dc32fe16f62" + }, + "source": [ + "\n", + "for i in range(len(array_of_logs)):\n", + " # removing newline characters\n", + " array_of_logs[i] = str(array_of_logs[i]).replace('\\\\n', ' ')\n", + " \n", + " # removes leading 'b from log\n", + " array_of_logs[i] = array_of_logs[i][2:]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;31m# removing newline characters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0marray_of_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\\\n'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'array_of_logs' is not defined" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OqeBizbVQn6m" + }, + "source": [ + "This code cell above removes newline characters and leading byte signature" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "heDQxXYAQzcU" + }, + "source": [ + "

Assigning Labels to Logs

" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nX15zcw5v8_z", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b072cc2f-d38c-4f5a-dabe-10ac71f41793" + }, + "source": [ + "labels= []\n", + "index = 0\n", + "to_remove = []\n", + "\n", + "\n", + "for x in range(len(labels_link)):\n", + " page = urlopen(labels_link[x])\n", + " try:\n", + " data = json.load(page) \n", + " except:\n", + " continue\n", + " labels.append(data[\"result\"])\n", + " index += 1\n", + "\n", + "# for x in range(len(labels_link2)):\n", + "# page = urlopen(labels_link2[x])\n", + "# try:\n", + "# data = json.load(page)\n", + "# labels.append(data[\"result\"])\n", + "# except:\n", + "# to_remove.append(index)\n", + "# continue\n", + "# index += 1\n", + "\n", + "# for x in range(len(labels_link3)):\n", + "# page = urlopen(labels_link3[x])\n", + "# try:\n", + "# data = json.load(page)\n", + "# labels.append(data[\"result\"])\n", + "# except:\n", + "# to_remove.append(index)\n", + "# continue\n", + "# index += 1\n", + "\n", + "\n", + "for i in range(len(array_of_logs)):\n", + " if i in to_remove:\n", + " array_of_logs[i] = None\n", + "\n", + "array_of_logs = [log for log in array_of_logs if log != None]\n", + "print(len(labels))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "489\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WflHVjf8LQuc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 181 + }, + "outputId": "63dc2308-b86e-4c23-a59a-d66b6e089d47" + }, + "source": [ + "with open(\"/content/drive/MyDrive/RedHat_BU/log/log_file_5.ob\", 'wb') as fp:\n", + " pickle.dump(array_of_logs,fp)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "FileNotFoundError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/drive/MyDrive/RedHat_BU/log/log_file_5.ob\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_logs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/content/drive/MyDrive/RedHat_BU/log/log_file_5.ob'" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uwk_jjWOR8rs" + }, + "source": [ + "with open(\"/content/drive/MyDrive/RedHat_BU/label/label_file_5.ob\", 'wb') as fp:\n", + " pickle.dump(labels,fp)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file