HiveBrain v1.2.0
Get Started
← Back to all entries
patternpythonMinor

Duplicate the previous input if zero or not a number

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
thepreviousnumberduplicateinputzeronot

Problem

I have the following code which duplicates the previous input if any entry is less than zero or NaN, except the first row of a matrix.

Is there any other efficient way to do this without using multiple for loops? The input matrix values may differ in some case and may contain float values or alphabets. The matrix considered here is just an example. The actual table will be 14352 x 42 in shape.

import numpy as np

data = [[0, -1, 2],
        [7, 8.1, -3],
        [-8, 5, -1],
        ['N', 7, -1]]
m, n = np.shape(data)

for i in range(1, m):
    for j in range(n):
        if data[i][j] < 0 or not isinstance(data[i][j], (int, float,long)):
            data[i][j] = data[i-1][j]


The output is:

[[0,-1,2],
 [7,8.1,2],
 [7,5,2],
 [7,7,2]]

Solution

It's possible to solve this problem using a functional Pythonic approach a layer at a time.

First, the original code is refactored as functions for reference. Long type and numpy were not available on my current workstation but were not necessary to process the example data.
A transpose and substituting generator are used to produce an alternative proof of concept. It is elaborated to include a "first qualified value or default to 0" rule because the original algorithm did not clean the first row and propagates unqualified values in a column until a qualified value occurs.

I've included a demo at the end that shows why a list comprehension fails since It does not alter the table in place.

#original code refactored as functions
data = [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]]
print("data: ", data)
def unnumeric_or_neg(z):
    return not isinstance(z, (int, float)) or z < 0

def clean1 (data,unqal):
    m, n = 4,3
    for i in range(1, m):
        for j in range(n):
            if unqal(data[i][j]) :
                data[i][j] = data[i-1][j]
clean1(data,unnumeric_or_neg)
print("result is in data", data)
#refresh data
data = [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]]
#help functions and another approach
def comp_transpose(t):
    "transpostion of rectangular table"
    return [[r[j] for r in t] for j,_ in enumerate(t[0])]

def sub_last_qal(lst,unqal):
    last_qual=0
    for (n,x) in enumerate(lst):
        if unqal(x):
            yield last_qual
        else:
            last_qual = x
            yield x
def clean2(data,unqal):
    data_t = comp_transpose(data)
    data_aug = [list(sub_last_qal(col,unqal)) for col in data_t]
    return [data[0],]+comp_transpose(data_aug)[1:]
print("2 step similar result",clean2(data,unnumeric_or_neg))
def first_qal(lst,unqal):
    """given a list return the first qualifying value given
    negative test unqal is set in scope"""
    for x in lst:
        if not unqal(x):
            return x
    return 0
def clean3(data,unqal):
    data_t = comp_transpose(data)
    data_aug = [list(sub_last_qal([first_qal(col,unqal),]+col,unqal)) for col in data_t]
    return comp_transpose(data_aug)[1:]
cn3 = clean3(data,unnumeric_or_neg)
print("augmented first qual rule ",cn3)

def first_qal_row(data_c,unqual):
    return [ first_qal(vlist,unqual) for vlist in comp_transpose(data_c)]

#list comprehensions do not support back references to results
def not_clean(data,unqal):
    return [data[0],]+[[(data[i-1][j] if unqal(x) else x ) \
        for (i,x) in enumerate(y)] for (j,y) in enumerate(data[1:])]
cn = not_clean(data,unnumeric_or_neg)
print("result is unclean", cn)
print("data is not changed ", data == [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]])


Output:

data:  [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]]
result is in data [[0, -1, 2], [7, 8.1, 2], [7, 5, 2], [7, 7, 2]]
2 step similar result [[0, -1, 2], [7, 8.1, 2], [7, 5, 2], [7, 7, 2]]
augmented first qual rule  [[0, 8.1, 2], [7, 8.1, 2], [7, 5, 2], [7, 7, 2]]
result is unclean [[0, -1, 2], [7, 8.1, 7], [7, 5, 8.1], [-1, 7, -3]]
data is not changed  True

Code Snippets

#original code refactored as functions
data = [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]]
print("data: ", data)
def unnumeric_or_neg(z):
    return not isinstance(z, (int, float)) or z < 0

def clean1 (data,unqal):
    m, n = 4,3
    for i in range(1, m):
        for j in range(n):
            if unqal(data[i][j]) :
                data[i][j] = data[i-1][j]
clean1(data,unnumeric_or_neg)
print("result is in data", data)
#refresh data
data = [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]]
#help functions and another approach
def comp_transpose(t):
    "transpostion of rectangular table"
    return [[r[j] for r in t] for j,_ in enumerate(t[0])]

def sub_last_qal(lst,unqal):
    last_qual=0
    for (n,x) in enumerate(lst):
        if unqal(x):
            yield last_qual
        else:
            last_qual = x
            yield x
def clean2(data,unqal):
    data_t = comp_transpose(data)
    data_aug = [list(sub_last_qal(col,unqal)) for col in data_t]
    return [data[0],]+comp_transpose(data_aug)[1:]
print("2 step similar result",clean2(data,unnumeric_or_neg))
def first_qal(lst,unqal):
    """given a list return the first qualifying value given
    negative test unqal is set in scope"""
    for x in lst:
        if not unqal(x):
            return x
    return 0
def clean3(data,unqal):
    data_t = comp_transpose(data)
    data_aug = [list(sub_last_qal([first_qal(col,unqal),]+col,unqal)) for col in data_t]
    return comp_transpose(data_aug)[1:]
cn3 = clean3(data,unnumeric_or_neg)
print("augmented first qual rule ",cn3)

def first_qal_row(data_c,unqual):
    return [ first_qal(vlist,unqual) for vlist in comp_transpose(data_c)]

#list comprehensions do not support back references to results
def not_clean(data,unqal):
    return [data[0],]+[[(data[i-1][j] if unqal(x) else x ) \
        for (i,x) in enumerate(y)] for (j,y) in enumerate(data[1:])]
cn = not_clean(data,unnumeric_or_neg)
print("result is unclean", cn)
print("data is not changed ", data == [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]])
data:  [[0, -1, 2], [7, 8.1, -3], [-8, 5, -1], ['N', 7, -1]]
result is in data [[0, -1, 2], [7, 8.1, 2], [7, 5, 2], [7, 7, 2]]
2 step similar result [[0, -1, 2], [7, 8.1, 2], [7, 5, 2], [7, 7, 2]]
augmented first qual rule  [[0, 8.1, 2], [7, 8.1, 2], [7, 5, 2], [7, 7, 2]]
result is unclean [[0, -1, 2], [7, 8.1, 7], [7, 5, 8.1], [-1, 7, -3]]
data is not changed  True

Context

StackExchange Code Review Q#101282, answer score: 3

Revisions (0)

No revisions yet.