|
3 | 3 |
|
4 | 4 | import markdown |
5 | 5 |
|
6 | | - |
7 | 6 | REGULAR_EXP = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" |
8 | 7 |
|
9 | | -LINKS = namedtuple("LINKS", ["line", "urls", "skip"]) |
| 8 | +LINKS = namedtuple("LINKS", ["line", "urls", "skip", "valid"]) |
10 | 9 |
|
11 | 10 |
|
12 | 11 | def parse_line(line): |
@@ -42,5 +41,37 @@ def parse_file(file_path): |
42 | 41 | line_links = parse_line(line) |
43 | 42 | if line_links: |
44 | 43 | skip = True if "noqa" in line else False |
45 | | - links.append(LINKS(line=line_number + 1, urls=line_links, skip=skip)) |
| 44 | + links.append(LINKS(line=line_number + 1, urls=line_links, skip=skip, valid=False)) |
46 | 45 | return links |
| 46 | + |
| 47 | + |
| 48 | +def link_validator(links_list): |
| 49 | + """Validate link |
| 50 | + Args: |
| 51 | + links_list: List of links. |
| 52 | +
|
| 53 | + Return: |
| 54 | + Named tuple of the valid and invalid links. |
| 55 | + """ |
| 56 | + validated_list = [] |
| 57 | + |
| 58 | + regex = re.compile( |
| 59 | + r"^(?:http|ftp)s?://" # http:// or https:// |
| 60 | + r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" |
| 61 | + # for domain |
| 62 | + r"localhost|" # localhost... |
| 63 | + r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip |
| 64 | + r"(?::\d+)?" # optional port |
| 65 | + r"(?:/?|[/?]\S+)$", |
| 66 | + re.IGNORECASE, |
| 67 | + ) |
| 68 | + |
| 69 | + for link in links_list: |
| 70 | + urls = [] |
| 71 | + for i in link.urls: |
| 72 | + if re.match(regex, i): |
| 73 | + urls.append(i) |
| 74 | + else: |
| 75 | + validated_list.append(LINKS(line=link.line, urls=[i], valid=False, skip=True)) |
| 76 | + validated_list.append(LINKS(line=link.line, urls=urls, skip=False, valid=True)) |
| 77 | + return validated_list |
0 commit comments