1236. Web Crawler

class Solution:
    def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]:
        visited = {startUrl}
        domain = startUrl.split("http://")[1].split("/")[0]
        ans = [startUrl]
        queue = deque([startUrl])
        
        while queue:
            url = queue.popleft()
            check = htmlParser.getUrls(url)
            for new_url in check:
                if new_url in visited:
                    continue
                if new_url.split("http://")[1].split("/")[0] != domain:
                    continue
                ans.append(new_url)
                visited.add(new_url)
                queue.append(new_url)        
        return ans
  • simple bfs with some string manipulation required for checking if the hostnames are the same.