patternswiftMinor
Search a string for barcodes but ignore certain characters
Viewed 0 times
ignoresearchbarcodesbutforcharacterscertainstring
Problem
I had to write a string search function that would search for barcodes, and be forgiving of minor formatting differences. For example, the barcode may be entered as
The goal is to get the correct substring indices (range) for highlighting the text on an element, such as:
The approach I took was to do a regular string search but just ignore certain characters, and "sanitize" the search string before doing the search in earnest.
```
import Foundation
extension String {
subscript (i: Int) -> Character {
return self[self.startIndex.advancedBy(i)]
}
subscript (i: Int) -> String {
return String(self[i] as Character)
}
subscript (r: Range) -> String {
let start = startIndex.advancedBy(r.startIndex)
let end = start.advancedBy(r.endIndex - r.startIndex)
return self[Range(start .. Range? {
let newSearch = search.componentsSeparatedByCharactersInSet(ignoredCharacterSet).joinWithSeparator("")
if newSearch.isEmpty {
return nil
}
var i = 0
while i self.characters.count - i {
break
}
var startIndex = -1
var endIndex = 0
var j = 0
while j = self.characters.count {
break
}
let c: String = self[k]
if c.rangeOfCharacterFromSet(ignoredCharacterSet) != nil {
i += 1
} else if c.compare(newSearch[j], options: options) == .OrderedSame {
if startIndex == -1 {
startIndex = k
}
endIndex = k
j += 1
} else {
break
YF 1942-AB in the customer's database, but a barcode scanner may read the barcode as YF1942AB.The goal is to get the correct substring indices (range) for highlighting the text on an element, such as:
let text = "Serial: YF 1942-B (Scanned)"
let range = text.rangeOfStringWithIgnoredCharacterSet("YF1942B")
print(range) // (8, 16)The approach I took was to do a regular string search but just ignore certain characters, and "sanitize" the search string before doing the search in earnest.
```
import Foundation
extension String {
subscript (i: Int) -> Character {
return self[self.startIndex.advancedBy(i)]
}
subscript (i: Int) -> String {
return String(self[i] as Character)
}
subscript (r: Range) -> String {
let start = startIndex.advancedBy(r.startIndex)
let end = start.advancedBy(r.endIndex - r.startIndex)
return self[Range(start .. Range? {
let newSearch = search.componentsSeparatedByCharactersInSet(ignoredCharacterSet).joinWithSeparator("")
if newSearch.isEmpty {
return nil
}
var i = 0
while i self.characters.count - i {
break
}
var startIndex = -1
var endIndex = 0
var j = 0
while j = self.characters.count {
break
}
let c: String = self[k]
if c.rangeOfCharacterFromSet(ignoredCharacterSet) != nil {
i += 1
} else if c.compare(newSearch[j], options: options) == .OrderedSame {
if startIndex == -1 {
startIndex = k
}
endIndex = k
j += 1
} else {
break
Solution
edit: The first time I read your code I mis-interpreted what was happening because of the signature of the function, and I suggested an approach that you were already using ¯_(ツ)_/¯
I like managing the loop using
Here's my approach that I hope gives you some inspiration or ideas!
edit: updated to loop over unicode scalars so it's easier to use
Testing
I like managing the loop using
String.Index instead of Int because it saves a lot of casting you'd need to do otherwise (this is all in Swift 3).let text = "hello"
var position = text.startIndex
while position prints
// h
// e
// l
// l
// oHere's my approach that I hope gives you some inspiration or ideas!
edit: updated to loop over unicode scalars so it's easier to use
exclusions.contains(currentCharacter)extension String {
func range(of target: String, ignoring exclusions: CharacterSet) -> Range? {
var startPosition = unicodeScalars.startIndex
let limit = unicodeScalars.index(unicodeScalars.endIndex, offsetBy: -target.unicodeScalars.count)
while startPosition < limit {
var targetPosition = target.unicodeScalars.startIndex
var currentPosition = startPosition
while currentPosition < unicodeScalars.endIndex {
// get characters at current & target positions then increment the current position
let currentCharacter = unicodeScalars[currentPosition]
let targetCharacter = target.unicodeScalars[targetPosition]
print("target", targetCharacter, "current", currentCharacter)
currentPosition = unicodeScalars.index(after: currentPosition)
if currentCharacter == targetCharacter {
// if the current character matches the target character:
// - increment the target posistion
// - return the range if there are no more characters to match in the target
targetPosition = target.unicodeScalars.index(after: targetPosition)
if targetPosition == target.unicodeScalars.endIndex,
let start = startPosition.samePosition(in: self),
let end = currentPosition.samePosition(in: self) {
return start..<end
}
}
else if targetPosition == target.unicodeScalars.startIndex || !exclusions.contains(currentCharacter) {
// if:
// - we're still at the start of the target; or
// - the current chracter isn't one of our valid exclusion characters
// then:
// - advance the start position and break out of the loop
startPosition = unicodeScalars.index(after: currentPosition)
break
}
}
}
return nil
}
}Testing
let messy = "Serial: YF 1942-B (Scanned)"
if let range = messy.range(of: "YF1942B", ignoring: CharacterSet.alphanumerics.inverted) {
// -> Range(8, 17)
messy.substring(with: range)
// -> "YF 1942-B"
}Code Snippets
let text = "hello"
var position = text.startIndex
while position < text.endIndex {
let character = text[position] // get substring at range
print(character)
position = text.index(after: position) // increment index by 1
}
// -> prints
// h
// e
// l
// l
// oextension String {
func range(of target: String, ignoring exclusions: CharacterSet) -> Range<String.Index>? {
var startPosition = unicodeScalars.startIndex
let limit = unicodeScalars.index(unicodeScalars.endIndex, offsetBy: -target.unicodeScalars.count)
while startPosition < limit {
var targetPosition = target.unicodeScalars.startIndex
var currentPosition = startPosition
while currentPosition < unicodeScalars.endIndex {
// get characters at current & target positions then increment the current position
let currentCharacter = unicodeScalars[currentPosition]
let targetCharacter = target.unicodeScalars[targetPosition]
print("target", targetCharacter, "current", currentCharacter)
currentPosition = unicodeScalars.index(after: currentPosition)
if currentCharacter == targetCharacter {
// if the current character matches the target character:
// - increment the target posistion
// - return the range if there are no more characters to match in the target
targetPosition = target.unicodeScalars.index(after: targetPosition)
if targetPosition == target.unicodeScalars.endIndex,
let start = startPosition.samePosition(in: self),
let end = currentPosition.samePosition(in: self) {
return start..<end
}
}
else if targetPosition == target.unicodeScalars.startIndex || !exclusions.contains(currentCharacter) {
// if:
// - we're still at the start of the target; or
// - the current chracter isn't one of our valid exclusion characters
// then:
// - advance the start position and break out of the loop
startPosition = unicodeScalars.index(after: currentPosition)
break
}
}
}
return nil
}
}let messy = "Serial: YF 1942-B (Scanned)"
if let range = messy.range(of: "YF1942B", ignoring: CharacterSet.alphanumerics.inverted) {
// -> Range<String.Index>(8, 17)
messy.substring(with: range)
// -> "YF 1942-B"
}Context
StackExchange Code Review Q#150117, answer score: 3
Revisions (0)
No revisions yet.