Skip to content

Commit 1ba75ef

Browse files
amolkahatraukadah
authored andcommitted
Added link validator. (#13)
- It will check link is valid or not. - If link is like `https://<hostname>:<port>` then it should mark it as skip. Signed-off-by: Amol Kahat <amolkahat@gmail.com>
1 parent 9cb9aa2 commit 1ba75ef

File tree

3 files changed

+44
-4
lines changed

3 files changed

+44
-4
lines changed

linkstatus/linkstatus.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import click
66
import requests
77

8+
from linkstatus.parser import link_validator
89
from linkstatus.parser import parse_file
910

1011

@@ -61,7 +62,7 @@ def main(source, recursive, timeout, retry):
6162

6263
for f in files:
6364
links = parse_file(f)
64-
65+
links = link_validator(links)
6566
if links:
6667
click.echo(click.style("Links in File: '{}'".format(f), bg="blue", fg="white"))
6768

linkstatus/parser.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33

44
import markdown
55

6-
76
REGULAR_EXP = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
87

9-
LINKS = namedtuple("LINKS", ["line", "urls", "skip"])
8+
LINKS = namedtuple("LINKS", ["line", "urls", "skip", "valid"])
109

1110

1211
def parse_line(line):
@@ -42,5 +41,37 @@ def parse_file(file_path):
4241
line_links = parse_line(line)
4342
if line_links:
4443
skip = True if "noqa" in line else False
45-
links.append(LINKS(line=line_number + 1, urls=line_links, skip=skip))
44+
links.append(LINKS(line=line_number + 1, urls=line_links, skip=skip, valid=False))
4645
return links
46+
47+
48+
def link_validator(links_list):
49+
"""Validate link
50+
Args:
51+
links_list: List of links.
52+
53+
Return:
54+
Named tuple of the valid and invalid links.
55+
"""
56+
validated_list = []
57+
58+
regex = re.compile(
59+
r"^(?:http|ftp)s?://" # http:// or https://
60+
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|"
61+
# for domain
62+
r"localhost|" # localhost...
63+
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
64+
r"(?::\d+)?" # optional port
65+
r"(?:/?|[/?]\S+)$",
66+
re.IGNORECASE,
67+
)
68+
69+
for link in links_list:
70+
urls = []
71+
for i in link.urls:
72+
if re.match(regex, i):
73+
urls.append(i)
74+
else:
75+
validated_list.append(LINKS(line=link.line, urls=[i], valid=False, skip=True))
76+
validated_list.append(LINKS(line=link.line, urls=urls, skip=False, valid=True))
77+
return validated_list

tests/dir/links_markdown.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,11 @@ Some text to show that the reference links can follow later.
3232
[link text itself]: http://www.reddit.com <!--noqa-->
3333

3434
[broken link](https://github.com/pythonpune/linkstatus)
35+
36+
https://github.com//pythonpune/
37+
38+
http://<hostname>:<port>
39+
40+
https://<hostname>:<port>/pages
41+
42+
file:///etc/hosts

0 commit comments

Comments
 (0)