HiveBrain v1.2.0
Get Started
← Back to all entries
patternpythonMinor

Need an algorithm to optimize grouping python dictionaries in hierarchical form

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
groupingneedalgorithmformpythonoptimizedictionarieshierarchical

Problem

Previously I asked a question on how to group dictionaries in a list in a hierarchical structure.

Initially I wanted to group a list of dictionaries that looks like the following, using multiple keys:

[{'dept':1, 'age':10, 'name':'Sam'},
{'dept':1, 'age':12, 'name':'John'},
.
.
.
{'dept':2,'age':20, 'name':'Mary'},
{'dept':2,'age':11, 'name':'Mark'},
{'dept':2,'age':11, 'name':'Tom'}]


And the output would be:

[
    {
        2: {
            20: [
                {
                    'age': 20,
                    'dept': 2,
                    'name': 'Mary'
                }
            ]
        },
        {
            11: [
                {
                    'age': 11,
                    'dept': 2,
                    'name': 'Mark'
                },
                {
                    'age': 11,
                    'dept': 2,
                    'name': 'Tom'
                }
            ]
        }
    },
    {
        1: {
            10: [
                {
                    'age': 10,
                    'dept': 1,
                    'name': 'Sam'
                }
            ]
        },
        {
            12: [
                {
                    'age': 12,
                    'dept': 1,
                    'name': 'John'
                }
            ]
        }
    }
]


Using this code:

```
import itertools, operator

l = [{'dept':1, 'age':10, 'name':'Sam'},
{'dept':1, 'age':12, 'name':'John'},
{'dept':2,'age':20, 'name':'Mary'},
{'dept':2,'age':11, 'name':'Mark'},
{'dept':2,'age':11, 'name':'Tom'}]

groups = ['dept', 'age', 'name']

groups.reverse()
def hierachical_data(data, groups):
g = groups[-1]
g_list = []
for key, items in itertools.groupby(data, operator.itemgetter(g)):
g_list.append({key:list(items)})
groups = groups[0:-1]
if(len(groups) != 0):
for e in g_list:
for k,v in e.items():
e[k] = hierachical_data(v,

Solution

Try doing a recursive sequence that iterates through each group, by passing a group index to each call, and find and build the tree as it receives inputs:

def generate_hierarchical_hash(entries, groups):
  """Returns hierarchical hash of entries, grouped at each level by the groups"""
  hierarchy = {}
  for to_insert in entries:
    insert_into_hierarchy(to_insert, hierarchy, groups, 0)
  return hierarchy

def insert_into_hierarchy(to_insert, hierarchy, groupings, current_grouping_level):
  if current_grouping_level == len(groupings):
    # if at final level, do not do anything - protection case
    pass
  else:
    grouping_at_level = groupings[current_grouping_level]
    grouping_key = to_insert[grouping_at_level]
    is_final_group = current_grouping_level == len(groupings) - 1
    if is_final_group:
      # if at last grouping, nodes will contain lists, so create or retrieve list
      # and append value to it
      if grouping_key in hierarchy:
        list_for_group = hierarchy[grouping_key]
      else:
        # create new list and insert it into hierarchy
        list_for_group = list()
        hierarchy[grouping_key] = list_for_group

      # insert to insert into end of list
      list_for_group.append(to_insert)
    else:
      # otherwise, find or create hash for this grouping,
      if grouping_key in hierarchy:
        hierarchy_for_group = hierarchy[grouping_key]
      else:
        # create hash and insert into parent hash
        hierarchy_for_group = {}
        hierarchy[grouping_key] = hierarchy_for_group
      # recursive call, go down a level and group
      insert_into_hierarchy(to_insert, hierarchy_for_group, groupings, current_grouping_level + 1)


This could be called like:

input =[{'dept':1, 'age':10, 'name':'Sam'},
        {'dept':1, 'age':12, 'name':'John'},
        {'dept':2,'age':20, 'name':'Mary'},
        {'dept':2,'age':11, 'name':'Mark'},
        {'dept':2,'age':11, 'name':'Tom'}]

groups = ['dept', 'age', 'name']

hierarchical_hash = generate_hierarchical_hash(input, groups)

Code Snippets

def generate_hierarchical_hash(entries, groups):
  """Returns hierarchical hash of entries, grouped at each level by the groups"""
  hierarchy = {}
  for to_insert in entries:
    insert_into_hierarchy(to_insert, hierarchy, groups, 0)
  return hierarchy


def insert_into_hierarchy(to_insert, hierarchy, groupings, current_grouping_level):
  if current_grouping_level == len(groupings):
    # if at final level, do not do anything - protection case
    pass
  else:
    grouping_at_level = groupings[current_grouping_level]
    grouping_key = to_insert[grouping_at_level]
    is_final_group = current_grouping_level == len(groupings) - 1
    if is_final_group:
      # if at last grouping, nodes will contain lists, so create or retrieve list
      # and append value to it
      if grouping_key in hierarchy:
        list_for_group = hierarchy[grouping_key]
      else:
        # create new list and insert it into hierarchy
        list_for_group = list()
        hierarchy[grouping_key] = list_for_group

      # insert to insert into end of list
      list_for_group.append(to_insert)
    else:
      # otherwise, find or create hash for this grouping,
      if grouping_key in hierarchy:
        hierarchy_for_group = hierarchy[grouping_key]
      else:
        # create hash and insert into parent hash
        hierarchy_for_group = {}
        hierarchy[grouping_key] = hierarchy_for_group
      # recursive call, go down a level and group
      insert_into_hierarchy(to_insert, hierarchy_for_group, groupings, current_grouping_level + 1)
input =[{'dept':1, 'age':10, 'name':'Sam'},
        {'dept':1, 'age':12, 'name':'John'},
        {'dept':2,'age':20, 'name':'Mary'},
        {'dept':2,'age':11, 'name':'Mark'},
        {'dept':2,'age':11, 'name':'Tom'}]

groups = ['dept', 'age', 'name']

hierarchical_hash = generate_hierarchical_hash(input, groups)

Context

StackExchange Code Review Q#17611, answer score: 2

Revisions (0)

No revisions yet.