HiveBrain v1.2.0
Get Started
← Back to all entries
patternswiftMinor

Search a string for barcodes but ignore certain characters

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
ignoresearchbarcodesbutforcharacterscertainstring

Problem

I had to write a string search function that would search for barcodes, and be forgiving of minor formatting differences. For example, the barcode may be entered as YF 1942-AB in the customer's database, but a barcode scanner may read the barcode as YF1942AB.

The goal is to get the correct substring indices (range) for highlighting the text on an element, such as:

let text = "Serial: YF 1942-B (Scanned)"
let range = text.rangeOfStringWithIgnoredCharacterSet("YF1942B")
print(range) // (8, 16)


The approach I took was to do a regular string search but just ignore certain characters, and "sanitize" the search string before doing the search in earnest.

```
import Foundation

extension String {

subscript (i: Int) -> Character {
return self[self.startIndex.advancedBy(i)]
}

subscript (i: Int) -> String {
return String(self[i] as Character)
}

subscript (r: Range) -> String {
let start = startIndex.advancedBy(r.startIndex)
let end = start.advancedBy(r.endIndex - r.startIndex)
return self[Range(start .. Range? {
let newSearch = search.componentsSeparatedByCharactersInSet(ignoredCharacterSet).joinWithSeparator("")
if newSearch.isEmpty {
return nil
}
var i = 0
while i self.characters.count - i {
break
}
var startIndex = -1
var endIndex = 0
var j = 0
while j = self.characters.count {
break
}
let c: String = self[k]
if c.rangeOfCharacterFromSet(ignoredCharacterSet) != nil {
i += 1
} else if c.compare(newSearch[j], options: options) == .OrderedSame {
if startIndex == -1 {
startIndex = k
}
endIndex = k
j += 1
} else {
break

Solution

edit: The first time I read your code I mis-interpreted what was happening because of the signature of the function, and I suggested an approach that you were already using ¯_(ツ)_/¯

I like managing the loop using String.Index instead of Int because it saves a lot of casting you'd need to do otherwise (this is all in Swift 3).

let text = "hello"
var position = text.startIndex
while position  prints
// h
// e
// l
// l
// o


Here's my approach that I hope gives you some inspiration or ideas!

edit: updated to loop over unicode scalars so it's easier to use exclusions.contains(currentCharacter)

extension String {

    func range(of target: String, ignoring exclusions: CharacterSet) -> Range? {

        var startPosition = unicodeScalars.startIndex

        let limit = unicodeScalars.index(unicodeScalars.endIndex, offsetBy: -target.unicodeScalars.count)

        while startPosition < limit {

            var targetPosition = target.unicodeScalars.startIndex
            var currentPosition = startPosition

            while currentPosition < unicodeScalars.endIndex  {

                // get characters at current & target positions then increment the current position
                let currentCharacter = unicodeScalars[currentPosition]
                let targetCharacter = target.unicodeScalars[targetPosition]

                print("target", targetCharacter, "current", currentCharacter)

                currentPosition = unicodeScalars.index(after: currentPosition)

                if currentCharacter == targetCharacter {
                    // if the current character matches the target character:
                    // - increment the target posistion
                    // - return the range if there are no more characters to match in the target
                    targetPosition = target.unicodeScalars.index(after: targetPosition)

                    if targetPosition == target.unicodeScalars.endIndex,
                        let start = startPosition.samePosition(in: self),
                        let end = currentPosition.samePosition(in: self) {

                        return start..<end
                    }
                }
                else if targetPosition == target.unicodeScalars.startIndex || !exclusions.contains(currentCharacter) {
                    // if:
                    // - we're still at the start of the target; or
                    // - the current chracter isn't one of our valid exclusion characters
                    // then:
                    // - advance the start position and break out of the loop
                    startPosition = unicodeScalars.index(after: currentPosition)
                    break
                }
            }
        }
        return nil
    }
}


Testing

let messy = "Serial: YF 1942-B (Scanned)"

if let range = messy.range(of: "YF1942B", ignoring: CharacterSet.alphanumerics.inverted) {
    // -> Range(8, 17)

    messy.substring(with: range)
    // -> "YF 1942-B"
}

Code Snippets

let text = "hello"
var position = text.startIndex
while position < text.endIndex {
    let character = text[position] // get substring at range
    print(character)
    position = text.index(after: position) // increment index by 1
}
// -> prints
// h
// e
// l
// l
// o
extension String {

    func range(of target: String, ignoring exclusions: CharacterSet) -> Range<String.Index>? {

        var startPosition = unicodeScalars.startIndex

        let limit = unicodeScalars.index(unicodeScalars.endIndex, offsetBy: -target.unicodeScalars.count)

        while startPosition < limit {

            var targetPosition = target.unicodeScalars.startIndex
            var currentPosition = startPosition

            while currentPosition < unicodeScalars.endIndex  {

                // get characters at current & target positions then increment the current position
                let currentCharacter = unicodeScalars[currentPosition]
                let targetCharacter = target.unicodeScalars[targetPosition]

                print("target", targetCharacter, "current", currentCharacter)

                currentPosition = unicodeScalars.index(after: currentPosition)

                if currentCharacter == targetCharacter {
                    // if the current character matches the target character:
                    // - increment the target posistion
                    // - return the range if there are no more characters to match in the target
                    targetPosition = target.unicodeScalars.index(after: targetPosition)

                    if targetPosition == target.unicodeScalars.endIndex,
                        let start = startPosition.samePosition(in: self),
                        let end = currentPosition.samePosition(in: self) {

                        return start..<end
                    }
                }
                else if targetPosition == target.unicodeScalars.startIndex || !exclusions.contains(currentCharacter) {
                    // if:
                    // - we're still at the start of the target; or
                    // - the current chracter isn't one of our valid exclusion characters
                    // then:
                    // - advance the start position and break out of the loop
                    startPosition = unicodeScalars.index(after: currentPosition)
                    break
                }
            }
        }
        return nil
    }
}
let messy = "Serial: YF 1942-B (Scanned)"

if let range = messy.range(of: "YF1942B", ignoring: CharacterSet.alphanumerics.inverted) {
    // -> Range<String.Index>(8, 17)

    messy.substring(with: range)
    // -> "YF 1942-B"
}

Context

StackExchange Code Review Q#150117, answer score: 3

Revisions (0)

No revisions yet.