patternpythonModerate
Trie implementation in Python
Viewed 0 times
implementationpythontrie
Problem
I have created a trie in Python and am looking for feedback. Specifically, I am looking for feedback on:
I have implemented the following logic:
. Get all the words in the trie
My purpose for coding the trie is to improve my Python and algorithm knowledge.
```
from collections import deque
class Node:
def __init__(self, character, parent):
self.character = character
if self.character is not None:
self.character = self.character.lower()
self.parent = parent
self.children = dict()
self.terminus = False
def add(self, child_node):
self.children[child_node.character] = child_node
class Trie:
def __init__(self):
self._root = Node(None, None)
def insert(self, word):
if word:
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = True
def __contains__(self, item):
current_node = self._root
contained = True
for symbol in self._normalize_word(item):
if symbol in current_node.children:
current_node = current_node.children[symbol]
else:
contained = False
break
return contained and current_node.terminus
def _normalize_word(self, word):
return word.strip().lower()
def _get_all_words(self, prefix, node, word_list):
if node.character:
prefix.append(
- If my code is 'pythonic'
- If my logic is proper when inserting and retrieving from the trie
I have implemented the following logic:
- Insert a word into the trie
- Check if a word exists in the trie
- Given a prefix, get all possible words
. Get all the words in the trie
My purpose for coding the trie is to improve my Python and algorithm knowledge.
```
from collections import deque
class Node:
def __init__(self, character, parent):
self.character = character
if self.character is not None:
self.character = self.character.lower()
self.parent = parent
self.children = dict()
self.terminus = False
def add(self, child_node):
self.children[child_node.character] = child_node
class Trie:
def __init__(self):
self._root = Node(None, None)
def insert(self, word):
if word:
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = True
def __contains__(self, item):
current_node = self._root
contained = True
for symbol in self._normalize_word(item):
if symbol in current_node.children:
current_node = current_node.children[symbol]
else:
contained = False
break
return contained and current_node.terminus
def _normalize_word(self, word):
return word.strip().lower()
def _get_all_words(self, prefix, node, word_list):
if node.character:
prefix.append(
Solution
Starting in the
In your
You can save a level of indentation by returning on false, essentially short-circuiting your method:
You also make a call to
Seeing your logic in your method made me think you could also have used a recursive method, something along the lines of:
One other change I made which depending on the size of the tree will improve performance is the use of a logical
In the
If
I saw this as another place where you could have done this recursively as well:
In the
```
def get_possible_words(self, word):
word_node = self.__contains(word, self._root)
re
Node class, there was a parent attribute that never gets referenced. Also I find if you are just assigning a variable based off one condition doing it in one line makes the code cleaner and easier to read.class Node:
def __init__(self, character):
self.character = character if character is None else character.lower()
self.children = dict()
self.terminus = False
def add(self, child_node):
self.children[child_node.character] = child_nodeIn your
Trie class specifically with the .insert and __contains__ methods:def insert(self, word):
if word: # checking for truth to continue
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = TrueYou can save a level of indentation by returning on false, essentially short-circuiting your method:
def insert(self, word):
if not word:
return
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = TrueYou also make a call to
enumerate but never use it:def insert(self, word):
if not word:
return
current_node = self._root
for character in self._normalize_word(word):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = TrueSeeing your logic in your method made me think you could also have used a recursive method, something along the lines of:
def insert(self, word):
if not word:
return
self.__insert(self._normalize_word(word), self._root).terminus = True
def __insert(self, word, node):
if not word:
return node
elif {word[0]} & node.children.keys():
return self.__insert(word[1:], node.children[word[0]])
else:
new_node = Node(word[0])
node.add(new_node)
return self.__insert(word[1:], new_node)One other change I made which depending on the size of the tree will improve performance is the use of a logical
& on the children.keys(). PEP 3106 brought about changes that allowed for set operations on dict-keys. This means you could just as well check if a dict has a value as a key by {value} & dict.keys() rather than iterating over every key.In the
__contains__ method where you set a truth flag to false then break and then test with a logical &:def __contains__(self, item):
current_node = self._root
contained = True
for symbol in self._normalize_word(item):
if symbol in current_node.children:
current_node = current_node.children[symbol]
else:
contained = False
break
return contained and current_node.terminusIf
contained is set to False theres no need to check at the bottom of the method. Just get rid of contained and return False if your first condition isn't met. Similar to the last point in .insert you can save a couple of lines if you short-circuit, basically no need for else if you are returning on it:def __contains__(self, item):
current_node = self._root
for symbol in self._normalize_word(item):
if symbol not in current_node.children:
return False
current_node = current_node.children[symbol]
return current_node.terminusI saw this as another place where you could have done this recursively as well:
def __contains(self, word, node):
if not word:
return node
return self.__contains(word[1:], node.children[word[0]]) if \
{word[0]} & node.children.keys() else None
def __contains__(self, item):
word = self._normalize_word(item)
return self.__contains(word, self._root) is not NoneIn the
__contains method you can return the Node of the last letter or None. This is useful where you can get a boolean value from your dunder-contains method to check for membership but also you can use the __contains method with the get_possible_words method later. This is similar to how I adjusted the __insert method to return the final Node after inserting to set .terminus.```
def get_possible_words(self, word):
word_node = self.__contains(word, self._root)
re
Code Snippets
class Node:
def __init__(self, character):
self.character = character if character is None else character.lower()
self.children = dict()
self.terminus = False
def add(self, child_node):
self.children[child_node.character] = child_nodedef insert(self, word):
if word: # checking for truth to continue
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = Truedef insert(self, word):
if not word:
return
current_node = self._root
for i, character in enumerate(self._normalize_word(word)):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = Truedef insert(self, word):
if not word:
return
current_node = self._root
for character in self._normalize_word(word):
if character in current_node.children:
current_node = current_node.children[character]
else:
child_node = Node(character, current_node)
current_node.add(child_node)
current_node = child_node
current_node.terminus = Truedef insert(self, word):
if not word:
return
self.__insert(self._normalize_word(word), self._root).terminus = True
def __insert(self, word, node):
if not word:
return node
elif {word[0]} & node.children.keys():
return self.__insert(word[1:], node.children[word[0]])
else:
new_node = Node(word[0])
node.add(new_node)
return self.__insert(word[1:], new_node)Context
StackExchange Code Review Q#147752, answer score: 10
Revisions (0)
No revisions yet.