utils.py (2919B)
1 import ipaddress 2 import re 3 4 try: 5 from types import UnicodeType 6 except ImportError: 7 UnicodeType = str 8 9 try: 10 from urllib.parse import urlparse 11 except ImportError: 12 from urlparse import urlparse 13 14 15 numeric = re.compile(r'[0-9]+$') 16 allowed = re.compile(r'(?!-)[a-z0-9-]{1,63}(?<!-)$', re.IGNORECASE) 17 18 19 def to_str(bstr, encoding='utf-8'): 20 if isinstance(bstr, bytes): 21 return bstr.decode(encoding) 22 return bstr 23 24 25 def to_bytes(ustr, encoding='utf-8'): 26 if isinstance(ustr, UnicodeType): 27 return ustr.encode(encoding) 28 return ustr 29 30 31 def to_int(string): 32 try: 33 return int(string) 34 except (TypeError, ValueError): 35 pass 36 37 38 def to_ip_address(ipstr): 39 ip = to_str(ipstr) 40 if ip.startswith('fe80::'): 41 ip = ip.split('%')[0] 42 return ipaddress.ip_address(ip) 43 44 45 def is_valid_ip_address(ipstr): 46 try: 47 to_ip_address(ipstr) 48 except ValueError: 49 return False 50 return True 51 52 53 def is_valid_port(port): 54 return 0 < port < 65536 55 56 57 def is_valid_encoding(encoding): 58 try: 59 u'test'.encode(encoding) 60 except LookupError: 61 return False 62 return True 63 64 65 def is_ip_hostname(hostname): 66 it = iter(hostname) 67 if next(it) == '[': 68 return True 69 for ch in it: 70 if ch != '.' and not ch.isdigit(): 71 return False 72 return True 73 74 75 def is_valid_hostname(hostname): 76 if hostname[-1] == '.': 77 # strip exactly one dot from the right, if present 78 hostname = hostname[:-1] 79 if len(hostname) > 253: 80 return False 81 82 labels = hostname.split('.') 83 84 # the TLD must be not all-numeric 85 if numeric.match(labels[-1]): 86 return False 87 88 return all(allowed.match(label) for label in labels) 89 90 91 def is_same_primary_domain(domain1, domain2): 92 i = -1 93 dots = 0 94 l1 = len(domain1) 95 l2 = len(domain2) 96 m = min(l1, l2) 97 98 while i >= -m: 99 c1 = domain1[i] 100 c2 = domain2[i] 101 102 if c1 == c2: 103 if c1 == '.': 104 dots += 1 105 if dots == 2: 106 return True 107 else: 108 return False 109 110 i -= 1 111 112 if l1 == l2: 113 return True 114 115 if dots == 0: 116 return False 117 118 c = domain1[i] if l1 > m else domain2[i] 119 return c == '.' 120 121 122 def parse_origin_from_url(url): 123 url = url.strip() 124 if not url: 125 return 126 127 if not (url.startswith('http://') or url.startswith('https://') or 128 url.startswith('//')): 129 url = '//' + url 130 131 parsed = urlparse(url) 132 port = parsed.port 133 scheme = parsed.scheme 134 135 if scheme == '': 136 scheme = 'https' if port == 443 else 'http' 137 138 if port == 443 and scheme == 'https': 139 netloc = parsed.netloc.replace(':443', '') 140 elif port == 80 and scheme == 'http': 141 netloc = parsed.netloc.replace(':80', '') 142 else: 143 netloc = parsed.netloc 144 145 return '{}://{}'.format(scheme, netloc)