HiveBrain v1.2.0
Get Started
← Back to all entries
patternpythonModerate

Trie implementation in Python

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
implementationpythontrie

Problem

I have created a trie in Python and am looking for feedback. Specifically, I am looking for feedback on:

  • If my code is 'pythonic'



  • If my logic is proper when inserting and retrieving from the trie



I have implemented the following logic:

  • Insert a word into the trie



  • Check if a word exists in the trie



  • Given a prefix, get all possible words


. Get all the words in the trie

My purpose for coding the trie is to improve my Python and algorithm knowledge.

```
from collections import deque

class Node:
def __init__(self, character, parent):
self.character = character
if self.character is not None:
self.character = self.character.lower()
self.parent = parent
self.children = dict()
self.terminus = False

def add(self, child_node):
self.children[child_node.character] = child_node

class Trie:

def __init__(self):
self._root = Node(None, None)

def insert(self, word):
if word:
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = True

def __contains__(self, item):
current_node = self._root
contained = True
for symbol in self._normalize_word(item):
if symbol in current_node.children:
current_node = current_node.children[symbol]
else:
contained = False
break
return contained and current_node.terminus

def _normalize_word(self, word):
return word.strip().lower()

def _get_all_words(self, prefix, node, word_list):
if node.character:
prefix.append(

Solution

Starting in the Node class, there was a parent attribute that never gets referenced. Also I find if you are just assigning a variable based off one condition doing it in one line makes the code cleaner and easier to read.

class Node:

    def __init__(self, character):
        self.character = character if character is None else character.lower()
        self.children = dict()
        self.terminus = False

    def add(self, child_node):
        self.children[child_node.character] = child_node


In your Trie class specifically with the .insert and __contains__ methods:

def insert(self, word):
    if word: # checking for truth to continue
        current_node = self._root
        for i, character in enumerate(self._normalize_word(word)):
            if character in current_node.children:
                current_node = current_node.children[character]
            else:
                child_node = Node(character, current_node)
                current_node.add(child_node)
                current_node = child_node
        current_node.terminus = True


You can save a level of indentation by returning on false, essentially short-circuiting your method:

def insert(self, word):
    if not word:
        return
    current_node = self._root
    for i, character in enumerate(self._normalize_word(word)):
        if character in current_node.children:
            current_node = current_node.children[character]
        else:
            child_node = Node(character, current_node)
            current_node.add(child_node)
            current_node = child_node
    current_node.terminus = True


You also make a call to enumerate but never use it:

def insert(self, word):
    if not word:
        return
    current_node = self._root
    for character in self._normalize_word(word):
        if character in current_node.children:
            current_node = current_node.children[character]
        else:
            child_node = Node(character, current_node)
            current_node.add(child_node)
            current_node = child_node
    current_node.terminus = True


Seeing your logic in your method made me think you could also have used a recursive method, something along the lines of:

def insert(self, word):
    if not word: 
        return
    self.__insert(self._normalize_word(word), self._root).terminus = True

def __insert(self, word, node):
    if not word:
        return node
    elif {word[0]} & node.children.keys():
        return self.__insert(word[1:], node.children[word[0]])
    else:
        new_node = Node(word[0])
        node.add(new_node)
        return self.__insert(word[1:], new_node)


One other change I made which depending on the size of the tree will improve performance is the use of a logical & on the children.keys(). PEP 3106 brought about changes that allowed for set operations on dict-keys. This means you could just as well check if a dict has a value as a key by {value} & dict.keys() rather than iterating over every key.

In the __contains__ method where you set a truth flag to false then break and then test with a logical &:

def __contains__(self, item):
    current_node = self._root
    contained = True
    for symbol in self._normalize_word(item):
        if symbol in current_node.children:
            current_node = current_node.children[symbol]
        else:
            contained = False
            break
    return contained and current_node.terminus


If contained is set to False theres no need to check at the bottom of the method. Just get rid of contained and return False if your first condition isn't met. Similar to the last point in .insert you can save a couple of lines if you short-circuit, basically no need for else if you are returning on it:

def __contains__(self, item):
    current_node = self._root
    for symbol in self._normalize_word(item):
        if symbol not in current_node.children:
            return False
        current_node = current_node.children[symbol]
    return current_node.terminus


I saw this as another place where you could have done this recursively as well:

def __contains(self, word, node):
    if not word:
        return node
    return self.__contains(word[1:], node.children[word[0]]) if \
           {word[0]} & node.children.keys() else None

def __contains__(self, item):
    word = self._normalize_word(item)
    return self.__contains(word, self._root) is not None


In the __contains method you can return the Node of the last letter or None. This is useful where you can get a boolean value from your dunder-contains method to check for membership but also you can use the __contains method with the get_possible_words method later. This is similar to how I adjusted the __insert method to return the final Node after inserting to set .terminus.

```
def get_possible_words(self, word):
word_node = self.__contains(word, self._root)
re

Code Snippets

class Node:

    def __init__(self, character):
        self.character = character if character is None else character.lower()
        self.children = dict()
        self.terminus = False

    def add(self, child_node):
        self.children[child_node.character] = child_node
def insert(self, word):
    if word: # checking for truth to continue
        current_node = self._root
        for i, character in enumerate(self._normalize_word(word)):
            if character in current_node.children:
                current_node = current_node.children[character]
            else:
                child_node = Node(character, current_node)
                current_node.add(child_node)
                current_node = child_node
        current_node.terminus = True
def insert(self, word):
    if not word:
        return
    current_node = self._root
    for i, character in enumerate(self._normalize_word(word)):
        if character in current_node.children:
            current_node = current_node.children[character]
        else:
            child_node = Node(character, current_node)
            current_node.add(child_node)
            current_node = child_node
    current_node.terminus = True
def insert(self, word):
    if not word:
        return
    current_node = self._root
    for character in self._normalize_word(word):
        if character in current_node.children:
            current_node = current_node.children[character]
        else:
            child_node = Node(character, current_node)
            current_node.add(child_node)
            current_node = child_node
    current_node.terminus = True
def insert(self, word):
    if not word: 
        return
    self.__insert(self._normalize_word(word), self._root).terminus = True

def __insert(self, word, node):
    if not word:
        return node
    elif {word[0]} & node.children.keys():
        return self.__insert(word[1:], node.children[word[0]])
    else:
        new_node = Node(word[0])
        node.add(new_node)
        return self.__insert(word[1:], new_node)

Context

StackExchange Code Review Q#147752, answer score: 10

Revisions (0)

No revisions yet.