Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions logparser/Drain/Drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
License : MIT
"""

import re
import regex as re
import os
import numpy as np
import pandas as pd
import hashlib
from datetime import datetime

import math
from collections import Counter

class Logcluster:
def __init__(self, logTemplate='', logIDL=None):
Expand Down Expand Up @@ -212,7 +213,8 @@ def outputResult(self, logClustL):
self.df_log['EventId'] = log_templateids
self.df_log['EventTemplate'] = log_templates

if self.keep_para:
# if self.keep_para:
if log_templates:
self.df_log["ParameterList"] = self.df_log.apply(self.get_parameter_list, axis=1)
self.df_log.to_csv(os.path.join(self.savePath, self.logName + '_structured.csv'), index=False)

Expand Down Expand Up @@ -332,6 +334,15 @@ def generate_logformat_regex(self, logformat):
regex = re.compile('^' + regex + '$')
return headers, regex

def entropy(self, string):
''' Used to encrypt the string parameter in ParameterList.

:param string: the string parameter
:return: the encrypted number
'''
letter_num, length_string = Counter(string), float(len(string))
return np.round(-sum(count/length_string * math.log(count/length_string, 10) for count in letter_num.values()),5)

def get_parameter_list(self, row):
template_regex = re.sub(r"<.{1,5}>", "<*>", row["EventTemplate"])
if "<*>" not in template_regex: return []
Expand All @@ -341,4 +352,5 @@ def get_parameter_list(self, row):
parameter_list = re.findall(template_regex, row["Content"])
parameter_list = parameter_list[0] if parameter_list else ()
parameter_list = list(parameter_list) if isinstance(parameter_list, tuple) else [parameter_list]
return parameter_list
parameter_list = [self.entropy(string) for string in parameter_list]
return parameter_list
5 changes: 3 additions & 2 deletions logparser/SHISO/SHISO.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import re
import os
import time
import multiprocessing as mp
from nltk import ngrams
from Queue import *
# from Queue import *
import numpy as np
import pandas as pd
import hashlib
Expand Down Expand Up @@ -293,7 +294,7 @@ def Adjust(self, pn, nidx, n):

def outputResult(self, node):
templateNo = 1
nodeQ = Queue()
nodeQ = mp.Queue()
nodeQ.put(node)

templates = [0] * self.df_log.shape[0]
Expand Down