Find last unique URL from long list of URLs in single traversal

Given a very long list of URLs, find out last unique URL. Only one traversal of all URLs is allowed. Examples:
Input: https://www.zambiatek.com https://www.zambiatek.com/quiz-corner-gq/ http://qa.zambiatek.com https://practice.zambiatek.com https://ide.zambiatek.com http://write.zambiatek.com https://www.zambiatek.com/quiz-corner-gq/ https://practice.zambiatek.com https://ide.zambiatek.com https://www.zambiatek.com/quiz-corner-gq/ http://qa.zambiatek.com https://practice.zambiatek.com Output: http://write.zambiatek.com
We can solve this problem in one traversal by using Trie with a Doubly Linked List (We can insert and delete in O(1) time). The idea is to insert all URLs into the Trie one by one and check if it is duplicate or not. To know if we have previously encountered the URL, we need to mark the last node of every URL as leaf node. If we encounter a URL for the first time, we insert it into the doubly Linked list and maintain a pointer to that node in linked list in leaf node of the trie. If we encounter a URL that is already in the trie and has pointer to the url in the linked list, we delete the node from the linked list and set its pointer in the trie to null. After all URLs are processed, linked list will only contain the URLs that are distinct and the node at the beginning of the linked list will be last unique URL.Â
CPP
// C++ program to print distinct URLs using Trie// and Doubly Linked List#include <bits/stdc++.h>using namespace std;Â
// Alphabet size (# of symbols)const int ALPHABET_SIZE = 256;Â
// A linked list nodestruct DLLNode {Â Â Â Â string data;Â Â Â Â DLLNode *next, *prev;};Â
// trie nodestruct TrieNode {Â Â Â Â TrieNode* children[ALPHABET_SIZE];Â
    // isLeaf is true if the node represents    // end of a word    bool isLeaf;Â
    DLLNode* LLptr;};Â
/* Given a reference (pointer to pointer) to thehead of a list and an int, inserts a new nodeon the front of the list. */void push(DLLNode*& head_ref, string new_data){Â Â Â Â DLLNode* new_node = new DLLNode;Â
    // put in the data    new_node->data = new_data;Â
    // Make next of new node as head and previous    // as NULL    new_node->next = (head_ref);    new_node->prev = NULL;Â
    // change prev of head node to new node    if (head_ref != NULL)        head_ref->prev = new_node;Â
    // move the head to point to the new node    head_ref = new_node;}Â
/* Function to delete a node in a Doubly Linked List.head_ref --> pointer to head node pointer.del --> pointer to node to be deleted. */void deleteNode(DLLNode*& head_ref, DLLNode* del){    // base case    if (head_ref == NULL || del == NULL)        return;Â
    // If node to be deleted is head node    if (head_ref == del)        head_ref = del->next;Â
    // Change next only if node to be deleted is    // NOT the last node    if (del->next != NULL)        del->next->prev = del->prev;Â
    // Change prev only if node to be deleted is    // NOT the first node    if (del->prev != NULL)        del->prev->next = del->next;Â
    // Finally, free the memory occupied by del    delete (del);    return;}Â
// Returns new trie node (initialized to NULLs)TrieNode* getNewTrieNode(void){Â Â Â Â TrieNode* pNode = new TrieNode;Â
    if (pNode) {        pNode->isLeaf = false;Â
        for (int i = 0; i < ALPHABET_SIZE; i++)            pNode->children[i] = NULL;Â
        pNode->LLptr = NULL;    }Â
    return pNode;}Â
// If not present, inserts key into trie// If the key is prefix of trie node, just marks leaf nodevoid insert(TrieNode* root, string key, DLLNode*& head){Â Â Â Â int index;Â Â Â Â TrieNode* pCrawl = root;Â
    for (int level = 0; level < key.length(); level++) {        index = int(key[level]);        if (!pCrawl->children[index])            pCrawl->children[index] = getNewTrieNode();Â
        pCrawl = pCrawl->children[index];    }Â
    if (pCrawl->isLeaf) {        // cout << "Duplicate Found " << key << endl;        // delete from linked list        if (pCrawl->LLptr)            deleteNode(head, pCrawl->LLptr);        pCrawl->LLptr = NULL;    }    else {        // mark last node as leaf        pCrawl->isLeaf = true;Â
        // insert to linked list        push(head, key);        pCrawl->LLptr = head;    }}Â
// Driver functionint main(){Â Â Â Â string urls[]Â
    TrieNode* root = getNewTrieNode();Â
    // Start with the empty list    DLLNode* head = NULL;    int n = sizeof(urls) / sizeof(urls[0]);Â
    // Construct Trie from given URLs    for (int i = 0; i < n; i++)        insert(root, urls[i], head);Â
    // head of linked list will point to last    // distinct URL    cout << head->data << endl;Â
    return 0;} |
Python3
# Python program to print distinct URLs using Trie# and Doubly Linked ListÂ
# Alphabet size (# of symbols)ALPHABET_SIZE = 256Â
# A linked list nodeÂ
Â
class DLLNode:    def __init__(self, data):        self.data = data        self.next = None        self.prev = NoneÂ
# trie nodeÂ
Â
class TrieNode:    def __init__(self):        self.children = [None] * ALPHABET_SIZE        # isLeaf is true if the node represents        # end of a word        self.isLeaf = False        self.LLptr = NoneÂ
# Given a reference (pointer to pointer) to the# head of a list and an int, inserts a new node# on the front of the list.Â
Â
def push(head_ref, new_data):    # put in the data    new_node = DLLNode(new_data)Â
    # Make next of new node as head and previous    # as NULL    new_node.next = head_refÂ
    # change prev of head node to new node    if head_ref is not None:        head_ref.prev = new_nodeÂ
    # move the head to point to the new node    head_ref = new_node    return head_refÂ
# Function to delete a node in a Doubly Linked List.# head_ref --> pointer to head node pointer.# del --> pointer to node to be deleted.Â
Â
def deleteNode(head_ref, del_node):    # base case    if head_ref is None or del_node is None:        returnÂ
    # If node to be deleted is head node    if head_ref == del_node:        head_ref = del_node.nextÂ
    # Change next only if node to be deleted is    # NOT the last node    if del_node.next is not None:        del_node.next.prev = del_node.prevÂ
    # Change prev only if node to be deleted is    # NOT the first node    if del_node.prev is not None:        del_node.prev.next = del_node.nextÂ
    # Finally, free the memory occupied by del    del del_node    return head_refÂ
# Returns new trie node (initialized to NULLs)Â
Â
def getNewTrieNode():Â Â Â Â pNode = TrieNode()Â
    pNode.isLeaf = False    pNode.LLptr = NoneÂ
    return pNodeÂ
# If not present, inserts key into trie# If the key is prefix of trie node, just marks leaf nodeÂ
Â
def insert(root, key, head):Â Â Â Â pCrawl = rootÂ
    for level in range(len(key)):        index = ord(key[level])        if pCrawl.children[index] is None:            pCrawl.children[index] = getNewTrieNode()Â
        pCrawl = pCrawl.children[index]Â
    if pCrawl.isLeaf:        # cout << "Duplicate Found " << key << endl;        # delete from linked list        if pCrawl.LLptr is not None:            head = deleteNode(head, pCrawl.LLptr)        pCrawl.LLptr = None    else:        # mark last node as leaf        pCrawl.isLeaf = TrueÂ
        # insert to linked list        head = push(head, key)        pCrawl.LLptr = headÂ
    return headÂ
Â
# Driver functionurls = [Â Â Â Â "https://practice.zambiatek.com"]Â
root = getNewTrieNode()Â
# Start with the empty listhead = NoneÂ
# Construct Trie from given URLsfor url in urls:Â Â Â Â head = insert(root, url, head)Â
# head of linked list will point to last# distinct URLprint(head.data)Â
# This code is contributed by Aman Kumar |
Javascript
const ALPHABET_SIZE = 256; // Alphabet size (# of symbols)Â
//DLLNode class is a linked list node, which stores data and the next and previous nodeclass DLLNode {    constructor(data) { //constructor for DLLNode, which takes data as an argument        this.data = data;        this.next = null;        this.prev = null;    }}Â
//TrieNode class is a trie node, which stores an array of children for each node, a boolean for isLeaf, and a pointer to a doubly linked list nodeclass TrieNode {    constructor() {        this.children = new Array(ALPHABET_SIZE).fill(null); //array of size ALPHABET_SIZE filled with null        this.isLeaf = false;        this.LLptr = null; //pointer to doubly linked list node    }}Â
//push function which takes a reference to the head of a list and an int, inserts a new node on the front of the listfunction push(head_ref, new_data) {Â Â Â Â const new_node = new DLLNode(new_data); //creates a new node of type DLLNode with data as the new_data argumentÂ
    new_node.next = head_ref; //sets the next node of the new node to the head node passed inÂ
    //change the prev of the head node to the new node if the head node is not null    if (head_ref !== null) {        head_ref.prev = new_node;    }Â
    head_ref = new_node; //move the head of the list to point to the new node    return head_ref;}Â
//deleteNode function which takes a reference to the head of a list and a node to be deleted, deletes the node from the listfunction deleteNode(head_ref, del_node) {    //base case if head or node to be deleted is null    if (head_ref === null || del_node === null) {        return;    }Â
    //if node to be deleted is the head node    if (head_ref === del_node) {        head_ref = del_node.next;    }Â
    //change next if node to be deleted is not the last node    if (del_node.next !== null) {        del_node.next.prev = del_node.prev;    }Â
    //change prev if node to be deleted is not the first node    if (del_node.prev !== null) {        del_node.prev.next = del_node.next;    }Â
    return head_ref;}Â
//getNewTrieNode function which returns a new trie node initialized to nullsfunction getNewTrieNode() {Â Â Â Â const pNode = new TrieNode();Â
    pNode.isLeaf = false;    pNode.LLptr = null;Â
    return pNode;}Â
//insert function which takes a root node, a key, and a head node, and inserts the key into the triefunction insert(root, key, head) {Â Â Â Â let pCrawl = root; //creates a pointer to the root nodeÂ
    //loops through the length of the key    for (let level = 0; level < key.length; level++) {        const index = key.charCodeAt(level); //gets the ascii value of the character at the current level        //if the node at the current level is null, create a new trie node        if (pCrawl.children[index] === null) {            pCrawl.children[index] = getNewTrieNode();        }Â
        pCrawl = pCrawl.children[index]; //moves the pointer to the next node in the trie    }Â
    //if the node is already a leaf node    if (pCrawl.isLeaf) {        //if the LLptr is not null, delete the node from the linked list        if (pCrawl.LLptr !== null) {            head = deleteNode(head, pCrawl.LLptr);        }        pCrawl.LLptr = null;    } else {        //mark last node as leaf        pCrawl.isLeaf = true;Â
        //insert to linked list        head = push(head, key);        pCrawl.LLptr = head;    }Â
    return head;}Â
//list of URLsconst urls = [Â Â Â Â "https://practice.zambiatek.com"];Â
const root = getNewTrieNode(); //create a root nodelet head = null; //head of linked listÂ
//loop through the URLs and insert them into the triefor (const url of urls) {Â Â Â Â head = insert(root, url, head);}Â
//head of linked list will point to the last distinct URLconsole.log(head.data); |
C#
// C# program to print distinct URLs using Trie// and Doubly Linked ListÂ
using System;using System.Collections.Generic;Â
// trie nodeclass TrieNode {    // Alphabet size (# of symbols)    const int ALPHABET_SIZE = 256;    public TrieNode[] children        = new TrieNode[ALPHABET_SIZE];Â
    // isLeaf is true if the node represents    // end of a word    public bool isLeaf;    public DLLNode LLptr;Â
    public TrieNode()    {        isLeaf = false;        LLptr = null;        for (int i = 0; i < ALPHABET_SIZE; i++)            children[i] = null;    }}// A linked list nodeclass DLLNode {    public string data;    public DLLNode next, prev;}Â
class Program {Â
    // If not present, inserts key into trie    // If the key is prefix of trie node, just marks leaf    // node    static void Insert(TrieNode root, string key,                       ref DLLNode head)    {        int index;        TrieNode pCrawl = root;Â
        for (int level = 0; level < key.Length; level++) {            index = (int)key[level];            if (pCrawl.children[index] == null)                pCrawl.children[index] = new TrieNode();Â
            pCrawl = pCrawl.children[index];        }Â
        if (pCrawl.isLeaf) { // delete from linked listÂ
            if (pCrawl.LLptr != null)                DeleteNode(ref head, pCrawl.LLptr);            pCrawl.LLptr = null;        }        elseÂ
        {            // mark last node as leaf            pCrawl.isLeaf = true;            // insert to linked list            Push(ref head, key);            pCrawl.LLptr = head;        }    }    /* Given a reference (pointer to pointer) to the    head of a list and an int, inserts a new node    on the front of the list. */    static void Push(ref DLLNode head_ref, string new_data)    {        DLLNode new_node = new DLLNode();        // put in the data        new_node.data = new_data;        // Make next of new node as head and previous        // as NULL        new_node.next = head_ref;        new_node.prev = null;        // change prev of head node to new node        if (head_ref != null)            head_ref.prev = new_node;        // move the head to point to the new node        head_ref = new_node;    }    /* Function to delete a node in a Doubly Linked List.    head_ref --> pointer to head node pointer.    del --> pointer to node to be deleted. */    static void DeleteNode(ref DLLNode head_ref,                           DLLNode del)    {        // base case        if (head_ref == null || del == null)            return;        // If node to be deleted is head node        if (head_ref == del)            head_ref = del.next;        // Change next only if node to be deleted is        // NOT the last node        if (del.next != null)            del.next.prev = del.prev;Â
        // Change prev only if node to be deleted is        // NOT the first node        if (del.prev != null)            del.prev.next = del.next;        // Finally, free the memory occupied by del        del = null;    }    // Driver function    static void Main()    {        string[] urlsÂ
        TrieNode root = new TrieNode();        DLLNode head = null;        int n = urls.Length;Â
        for (int i = 0; i < n; i++)            Insert(root, urls[i], ref head);Â
        Console.WriteLine(head.data);    }} |
Java
import java.util.*;Â
class Trie {Â Â Â Â private static final int ALPHABET_SIZEÂ Â Â Â Â Â Â Â = 256; // Alphabet size (# of symbols)Â
    // DLLNode class is a linked list node, which stores    // data and the next and previous node    private static class DLLNode {        String data; // data stored in the node        DLLNode prev; // pointer to the previous node        DLLNode next; // pointer to the next nodeÂ
        DLLNode(String data)        { // constructor for DLLNode, which takes data as an          // argument            this.data = data;            this.prev = null;            this.next = null;        }    }Â
    // TrieNode class is a trie node, which stores an array    // of children for each node, a boolean for isLeaf, and    // a pointer to a doubly linked list node    private static class TrieNode {        TrieNode[] children; // array of size ALPHABET_SIZE                             // filled with null        boolean isLeaf; // boolean to mark if the node is a                        // leaf node        DLLNode LLptr; // pointer to doubly linked list nodeÂ
        TrieNode()        {            children = new TrieNode[ALPHABET_SIZE];            isLeaf = false;            LLptr = null;        }    }Â
    // push function which takes a reference to the head of    // a list and a string, inserts a new node at the front    // of the list    private static DLLNode push(DLLNode headRef,                                String newData)    {        DLLNode newNode = new DLLNode(            newData); // creates a new node of type DLLNode                      // with data as the new_data argumentÂ
        newNode.next            = headRef; // sets the next node of the new node                       // to the head node passed inÂ
        // change the prev of the head node to the new node        // if the head node is not null        if (headRef != null) {            headRef.prev = newNode;        }Â
        headRef = newNode; // move the head of the list to                           // point to the new node        return headRef;    }Â
    // deleteNode function which takes a reference to the    // head of a list and a node to be deleted, deletes the    // node from the list    private static DLLNode deleteNode(DLLNode headRef,                                      DLLNode delNode)    {        // base case if head or node to be deleted is null        if (headRef == null || delNode == null) {            return headRef;        }Â
        // if node to be deleted is the head node        if (headRef == delNode) {            headRef = delNode.next;        }Â
        // change next if node to be deleted is not the last        // node        if (delNode.next != null) {            delNode.next.prev = delNode.prev;        }Â
        // change prev if node to be deleted is not the        // first node        if (delNode.prev != null) {            delNode.prev.next = delNode.next;        }Â
        return headRef;    }Â
    // getNewTrieNode function which returns a new trie node    // initialized to nulls    private static TrieNode getNewTrieNode()    {        TrieNode pNode = new TrieNode();Â
        pNode.isLeaf = false;        pNode.LLptr = null;Â
        return pNode;    }Â
    // insert function which takes a root node, a key, and a    // head node, and inserts the key into the trie    static DLLNode insert(TrieNode root, String key,                          DLLNode head)    {        TrieNode pCrawl            = root; // create pointer to root nodeÂ
        // loop through the levels        for (int level = 0; level < key.length(); level++) {            int index = (int)key.charAt(level);Â
            if (pCrawl.children[index] == null) {                pCrawl.children[index] = getNewTrieNode();            }Â
            // move pointer to next node            pCrawl = pCrawl.children[index];        }Â
        // if the node is already a leaf node        if (pCrawl.isLeaf) {            if (pCrawl.LLptr != null) {Â
                // if the LLptr is not null, delete the node                // from the linked list                head = deleteNode(head, pCrawl.LLptr);            }            pCrawl.LLptr = null;        }        else {Â
            // mark last node as leaf            pCrawl.isLeaf = true;            head = push(head, key);            pCrawl.LLptr = head;        }Â
        return head;    }Â
    // Driver code    public static void main(String[] args)    {        String[] urls = {            "https://practice.zambiatek.com"        };Â
        TrieNode root = getNewTrieNode();        DLLNode head = null;Â
        // Function calls        for (String url : urls) {            head = insert(root, url, head);        }Â
        System.out.println(head.data);    }} |
http://write.zambiatek.com
Time Complexity: O(n*m) where m is the maximum length of a URL, and n is the number of URLs.
Auxiliary Space: O(ALPHABET_SIZE * m * n)
This article is contributed by Aditya Goel. If you like zambiatek and would like to contribute, you can also write an article using write.zambiatek.com or mail your article to review-team@zambiatek.com. See your article appearing on the zambiatek main page and help other Geeks. Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 zambiatek!



