My solutions to regular expression or regex problems on HackerRank #
I have a few useful links to share with you before you start solving regex problems. Here is a fine
tutorial on how to use Python’s re
module to work with regular expressions and also an online
tool to test Regex patterns. And, if you want to learn regex then you can go through
this video.
Backreferences #
1. Backreferences To Failed Groups #
https://www.hackerrank.com/challenges/backreferences-to-failed-groups/problem
Regex_Pattern = r"^\d{2}(-?)(\d{2}\1){2}\d{2}$"
2. Forward References #
https://www.hackerrank.com/challenges/forward-references/problem
Regex_Pattern = '^(\\2tic|(tac))+$';
Without using forward reference: ^tac(tac(tic)?)*$
Grouping and Capturing #
1. Matching Word Boundaries #
https://www.hackerrank.com/challenges/matching-word-boundaries/problem
Regex_Pattern = r’\b[aeiouAEIOU][a-zA-Z]*\b'
import re
print(str(bool(re.search(Regex_Pattern, input()))).lower())
2. Alternative Matching #
https://www.hackerrank.com/challenges/alternative-matching/problem
Regex_Pattern = r’^(Mr|Mrs|Ms|Dr|Er)\.[A-Za-z]{1,}$'
import re
print(str(bool(re.search(Regex_Pattern, input()))).lower())
Character Classes #
1. Matching Character Ranges #
https://www.hackerrank.com/challenges/matching-range-of-characters/problem
Regex_Pattern = r’^[a-z][1-9][^a-z][^A-Z][A-Z]'
import re
print(str(bool(re.search(Regex_Pattern, input()))).lower())
Applications #
1. Detect HTML links #
https://www.hackerrank.com/challenges/detect-html-links/problem
import re for i in range(int(input().strip())): data = input().strip() matches = re.findall(r’[^<]*<a href="([^"]+)".*?>(?:[^<]<\w+>)*([^<]*?)(?:<\/\w+>)*<\/a>’, data) if matches: for m in matches: print("{0},{1}".format(m[0].strip(), m[1].strip()))
2. IP Address Validation #
https://www.hackerrank.com/challenges/ip-address-validation/problem
import re
for i in range(int(input().strip())): data = input().strip() match_ipv4 = re.search(’^([0-9]|[01]?[0-9][0-9]|2[0-4][0-9]|25[0-5])(\.([0-9]|[01]?[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}$’, data) match_ipv6 = re.search(’^([0-9a-fA-F]{1,4})(:([0-9a-fA-F]{1,4})){7}$’, data)
if match\_ipv4 == None and match\_ipv6 != None:
ip\_ver = "IPv6"
elif match\_ipv4 != None and match\_ipv6 == None:
ip\_ver = "IPv4"
else:
ip\_ver = "Neither"
print(ip\_ver)
3. Detect HTML Tags #
https://www.hackerrank.com/challenges/detect-html-tags/problem
import re
tags = set() for i in range(int(input().strip())): data = input().strip() matches = re.findall(r’<\/?([a-z0-9]+).*?>’, data) if matches: for m in matches: tags.add(m.strip())
tag_list = list(tags) tag_list.sort()
for i in range(len(tag_list)-1): print(tag_list[i] + ‘;’, end="")
print(tag_list[i+1])
4. Detect the Email Addresses #
https://www.hackerrank.com/challenges/detect-the-email-addresses/problem
import re
emails = set() for i in range(int(input().strip())): data = input().strip() matches = re.findall(r’([A-Za-z0-9_.]+@[A-Za-z0-9]+(?:\.[A-Za-z]+)*)’, data) if matches: for m in matches: emails.add(m.strip())
email_list = list(emails) email_list.sort()
for i in range(len(email_list)-1): print(email_list[i] + ‘;’, end="")
print(email_list[i+1])
5. Detect the Domain Name #
https://www.hackerrank.com/challenges/detect-the-domain-name/problem
import re
domains = set() for i in range(int(input().strip())): data = input().strip() matches = re.findall(r’https?:\/\/(?:ww[w2]\.)?(([A-Za-z0-9-]+\.)+([A-Za-z-]+))’, data) if matches: for m in matches: domains.add(m[0].strip())
domain_list = list(domains) domain_list.sort()
for i in range(len(domain_list)-1): print(domain_list[i] + ‘;’, end="")
print(domain_list[i+1])
6. Find A Sub-Word #
https://www.hackerrank.com/challenges/find-substring/problem
import re
data = [] for i in range(int(input().strip())): data.append(input().strip())
for i in range(int(input().strip())): query = input().strip() matches_count = 0 for e in data: matches = re.findall(r’[A-Za-z_]’+query+r’[A-Za-z_]’, e) matches_count += len(matches)
print(matches\_count)
7. Split the Phone Numbers #
https://www.hackerrank.com/challenges/split-number/problem
import re
for i in range(int(input().strip())): data = input().strip() split_str = re.split(r’[\s-]’, data) print(‘CountryCode=’+split_str[0]+",LocalAreaCode="+split_str[1]+",Number="+split_str[2])
8. Find a Word #
https://www.hackerrank.com/challenges/find-a-word/problem
import re
data = [] for i in range(int(input().strip())): data.append(input().strip())
for i in range(int(input().strip())): word = input().strip() matches_count = 0 for e in data: matches = re.findall(r’\b’+word+r’\b’, e) matches_count += len(matches)
print(matches\_count)
9. Detecting Valid Latitude and Longitude Pairs #
https://www.hackerrank.com/challenges/detecting-valid-latitude-and-longitude/problem
import re
def validate(data): # (latitude, longitude) pattern = r’\(’ + r’[\+-]?(90(\.0+)?|[1-8]\d(\.[0-9]+)?|\d(\.[0-9]+)?), [\+-]?(180(\.0+)?|1[0-7]\d(\.[0-9]+)?|[1-9]\d(\.[0-9]+)?|\d(\.[0-9]+)?)’ + r’\)’ return re.search(pattern, data)
for i in range(int(input().strip())): if validate(input().strip()): print(“Valid”) else: print(“Invalid”)
10. Valid PAN format #
https://www.hackerrank.com/challenges/valid-pan-format/problem
import re
for i in range(int(input().strip())): data = str(input().split()) match = re.search(r’[A-Z]{5}[0-9]{4}[A-Z]’, data) if match: print(“YES”) else: print(“NO”)
11. Alien Username #
https://www.hackerrank.com/challenges/alien-username/problem
import re
for i in range(int(input().strip())): data = input().strip() match = re.search(r’^[_.][0-9]+[A-Za-z]*[_]?$’, data) if match: print(“VALID”) else: print(“INVALID”)
12. The British and American Style of Spelling #
https://www.hackerrank.com/challenges/uk-and-us/problem
import re
data = " “.join([input().strip() for i in range(int(input().strip()))])
for i in range(int(input().strip())): print(len(re.findall(input()[:-2]+"(ze|se)”, data)))
13. UK and US: Part 2 #
https://www.hackerrank.com/challenges/uk-and-us-2/problem
import re
data = " “.join([input().strip() for i in range(int(input().strip()))])
for i in range(int(input().strip())): word = input().strip() word2 = re.sub(“our”, “or”, word) print(len(re.findall(r’\b’+f’({word}|{word2})’+r’\b’, data)))