-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse.py
More file actions
101 lines (78 loc) · 3.32 KB
/
parse.py
File metadata and controls
101 lines (78 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from scrape import scrape, generateColors
from requisite import parseReqs
import re
import json
from pprint import pprint
def nodeIsInvalid(node, desc, data) -> bool:
if desc == "No description":
# print("No description for: " + node["title"] + ". Skipping node")
return True
matches = [dNode["title"] for dNode in data["nodes"] if dNode["title"] == node["title"]]
if len(matches) > 0:
print("Already have nodes for: " + node["title"] + ". Skipping node")
return True
return False
def generateLinks(node, desc, data) -> None:
"""
Creates links for the node
"""
coreqPattern = r"((?<=Corequisites: ).*?(?=(\.)|(.$)))|((?<=Corequisite: ).*?(?=(\.)|(.$)))|((?<=Corequisite ).*?(?=\.|(.$)))|((?<=Corequisites ).*?(?=\.|(.$)))"
prereqPattern = r"((?<=Prerequisites: ).*?(?=\.|(.$)))|((?<=Prerequisite: ).*?(?=(\.)|(.$)))|((?<=Prerequisite ).*?(?=\.|(.$)))|((?<=Prerequisites ).*?(?=\.|(.$)))"
coreqs = re.search(coreqPattern, desc)
prereqs = re.search(prereqPattern, desc)
if coreqs and prereqs:
parseReqs(prereqs.group(), node["title"], "R", data)
parseReqs(coreqs.group(), node["title"], "C", data)
elif prereqs:
parseReqs(prereqs.group(), node["title"], "R", data)
elif coreqs:
parseReqs(coreqs.group(), node["title"], "C", data)
def generateNode(course, desc, data, colorMap, nodeIdCounter):
if nodeIsInvalid(course, desc, data): return False
subjectCodePattern = r".*(?= \d)"
subject = re.search(subjectCodePattern, course["title"])
color = colorMap[subject.group()]
data["nodes"].append({ "id": nodeIdCounter, "title": course["title"], "desc": desc, "color": color})
return True
def processData(scrapeData, colorMap, subjects) -> dict:
# create new nodes and links
data = {
"nodes" : [],
"links" : [],
"subjects": subjects,
"noLinks": [],
}
nodeIdCounter = 0
for course in scrapeData:
desc = re.sub(" {2,}", " ", course["desc"])
success = generateNode(course, desc, data, colorMap, nodeIdCounter)
if success:
generateLinks(course, desc, data)
nodeIdCounter += 1
return data
def labelLinklessNodes(data) -> None:
for node in data["nodes"]:
linksThatTarget = [link for link in data["links"] if link["target"] == node["id"]]
linksThatUse = [link for link in data["links"] if link["source"] == node["id"]]
if len(linksThatTarget) == 0 and len(linksThatUse) == 0:
data["noLinks"].append(node["title"])
def save(filePath, data):
with open(filePath, "w") as outfile:
json.dump(data, outfile)
def main():
# print("Sending request")
# scrapeRaw, subjects = scrape()
with open("./cache/scrape-raw.json", "r") as file:
scrapeRaw = json.load(file)
with open("./cache/subjects.json", "r") as file:
subjects = json.load(file)
# TODO: Not enough color combs. Need to give multiple subjects same color. Can I do group them based on how close they are connected?
print("Generating colors")
colorMap = generateColors(subjects=subjects)
# pprint(colorMap)
print("Processing data")
data = processData(scrapeRaw, colorMap, subjects)
labelLinklessNodes(data)
save("./courses.json", data)
if __name__ == "__main__":
main()