GS
2012-02-25 21:57:55 UTC
Just thought this should be pulled out of "What is a class?" since it's
way off topic to persue there...
The setup I used for testing my function in Excel 2007 (necessary for
the number of rows of data)...
Sub Setup_Data_StripDupes()
Dim lCalcMode&, bEventsEnabled As Boolean
With Application
lCalcMode = .Calculation: bEventsEnabled = .EnableEvents
.Calculation = xlCalculationManual: .EnableEvents = False
.ScreenUpdating = False
End With 'Application
With Range("A1:B500000")
.Formula = "=text(randbetween(1,10^6),""0000000000000"")"
.Value = .Value
End With
With Application
.Calculation = lCalcMode: .EnableEvents = bEventsEnabled
.ScreenUpdating = True
End With 'Application
End Sub
..which generates duplicates and so ideal for the tests since we want
to find matches (or non-matches). This is a typical task when data is
'dumped' into a spreadsheet for analysis/filtering/processing.
I suppose you could do similar in VB and add the results to an array,
but you'd have to do it twice because the function requires separate
arrays be used for the items being searched for matches and the items
being checked for matches. This, then, will obviate the need for using
Excel if the output was dumped into a file, textbox, or listbox.
'''''
The function is called by the following procedure that collects some
input from the user and passes this to the function:
Sub CompareCols_FilterMatches()
Dim bSuccess As Boolean, lMatchesFound As Long
Dim vAns As Variant, vCriteria(5) As Variant, sMsg As String
'Get the label of the columns to act on
Const MSG As String = "Please enter the label of the column"
tryagain:
'Column to filter
sMsg = MSG & " to be filtered": vAns = Application.InputBox(sMsg,
Type:=2)
If vAns = False Or vAns = "" Then Beep: Exit Sub
vCriteria(0) = Range(vAns & "1:" & vAns & Cells(Rows.Count,
vAns).End(xlUp).Row).Address
'Output goes in the column being filtered unless specified otherwise
below
vCriteria(2) = UCase$(vAns)
'Column to be checked
sMsg = MSG & " to check for matches": vAns =
Application.InputBox(sMsg, Type:=2)
If vAns = False Or vAns = "" Then Beep: Exit Sub
vCriteria(1) = Range(vAns & "1:" & vAns & Cells(Rows.Count,
vAns).End(xlUp).Row).Address
'Make sure lists contain more than 1 item
If Not Range(vCriteria(0)).Cells.Count > 1 _
Or Not Range(vCriteria(1)).Cells.Count > 1 Then
sMsg = "Columns MUST have more than one value!" & vbLf & vbLf
sMsg = sMsg & "Please try again with a different set of
columns"
MsgBox sMsg, vbCritical: GoTo tryagain
End If
'Column to receive the results
sMsg = MSG & "where the new list is to go" & vbLf _
& "(Leave blank or click 'Cancel' to use column '" &
vCriteria(2) & "')"
vAns = Application.InputBox(sMsg, Type:=2)
If Not (vAns = False) And (vAns <> "") Then vCriteria(2) =
UCase$(vAns)
'Return or remove matches?
sMsg = "Do you want to return the matches found instead of removing
them?"
vAns = MsgBox(sMsg, vbYesNo + vbQuestion)
If (vAns = vbYes) Then vCriteria(3) = 1 Else vCriteria(3) = 0
'Return a unique list?
sMsg = "Do you want only unique items in the returned list?" & vbLf &
vbLf & "(No duplicates)"
vAns = MsgBox(sMsg, vbYesNo + vbQuestion) '//YES = no dupes allowed
If (vAns = vbYes) Then vCriteria(4) = 0 Else vCriteria(4) = 1
bSuccess = FilterMatches(lMatchesFound, vCriteria())
If lMatchesFound = 0 Then MsgBox "No matches found!": Exit Sub
If lMatchesFound < 0 Then
sMsg = "Both columns must have more than 1 item!"
sMsg = sMsg & vbLf & vbLf & "Please try again: specify different
columns!"
MsgBox sMsg, vbExclamation: Exit Sub
End If 'lMatchesFound < 0
If bSuccess Then
sMsg = Format(CStr(lMatchesFound), "#,##0") _
& " Matches were found"
If vAns = vbYes Then _
sMsg = sMsg & " (including non-match duplicates)"
MsgBox sMsg '//comment out if using option below
'Optional: Ask to run a process on the new list
' sMsg = sMsg & vbLf & vbLf _
' & "Do you want to process the new list?"
'
' vAns = MsgBox(sMsg, vbYesNo + vbQuestion)
' If vAns = vbYes Then
' 'Code... ('Call' a process to act on the new list)
' End If 'vAns = vbYes
Else
MsgBox "An error occured!"
End If 'bSuccess
End Sub
I'm planning to use a userform so all inputs can be retrieved via a
single dialog rather than multiple prompts.
The function...
Function FilterMatches(Matches As Long, Criteria() As Variant) As
Boolean
' Compares 2 user-specified cols and filters matches found.
' User can also specific target col to receive resulting list.
' Optionally supports returning a unique list or allow duplicates.
' Optionally supports returning matches or non-matches.
'
' Args In: Matches: ByRef var to return number of matches found to
the caller.
'
' vCriteria(): A variant array containing the filtering
parameters.
' Criteria(0) - Address of the values to be filtered
' Criteria(1) - Address of the values to check
' Criteria(2) - Label of the column to put the filtered
list
' Criteria(3) - Numeric value to determine if we return
matches or non-matches
' Criteria(4) - Numeric value to determine if we return a
unique list or allow dupes
'
' Returns: True if matches found and no error occurs;
' False if a: matches not found --OR-- error occurs;
Dim i&, j& 'as long
Dim vFilterRng, vCheckRng, vResult, vaMatches(), vaNoMatches(),
vaDataOut() 'as variant
Dim bReturnMatches As Boolean, bMatch As Boolean, bDupesAllowed As
Boolean
Dim cItemsToCheck As New Collection, sMsg$, sRngOut$ 'as string
'Load the filtering criteria
vFilterRng = Range(Criteria(0)): vCheckRng = Range(Criteria(1)):
sRngOut = Criteria(2)
bReturnMatches = (Criteria(3) = 1): bDupesAllowed = (Criteria(4) = 1)
ReDim vaMatches(UBound(vFilterRng)): ReDim
vaNoMatches(UBound(vFilterRng)): j = 0
'Load the Collection with the values to be checked.
'Collections only allow unique keys so use OERN (no need to check if
they already exist)
Set cItemsToCheck = New Collection: On Error Resume Next
For i = LBound(vCheckRng) To UBound(vCheckRng)
cItemsToCheck.Add Key:=CStr(vCheckRng(i, 1)), Item:=vbNullString
Next 'i
Err.Clear
Debug.Print Now()
'Check the Collection for matches
On Error GoTo MatchFound
For i = LBound(vFilterRng) To UBound(vFilterRng)
bMatch = False '..reset
cItemsToCheck.Add Key:=CStr(vFilterRng(i, 1)), Item:=vbNullString
If bMatch Then
If bReturnMatches Then vaMatches(j) = vFilterRng(i, 1): j = j + 1
Else
vaNoMatches(j) = vFilterRng(i, 1): j = j + 1
cItemsToCheck.Remove CStr(vFilterRng(i, 1)) '..so dupes of it
don't get counted
End If 'bMatch
Next 'i
'Initialize the return list
If bReturnMatches Then vResult = vaMatches Else vResult = vaNoMatches
'Return a list of unique values?
If Not bDupesAllowed Then
On Error GoTo UniqueList
Dim cUniqueList As New Collection
For i = LBound(vResult) To UBound(vResult)
cUniqueList.Add Key:=CStr(vResult(i)), Item:=vbNullString
Next 'i
ReDim vaDataOut(cUniqueList.Count - 1, 0): j = 0
Else
ReDim vaDataOut(UBound(vResult), 0): j = 0
End If 'Not bDupesAllowed
Err.Clear: On Error GoTo ErrExit
'Make the list to return contiguous.
For i = LBound(vaDataOut) To UBound(vaDataOut)
If Not vResult(i) = Empty Then vaDataOut(j, 0) = vResult(i): j = j
+ 1
Next 'i
If Matches > 0 Then '..only write if Matches > 0
Columns(sRngOut).ClearContents
With Range(sRngOut & "1").Resize(UBound(vaDataOut) + 1, 1)
.Value = vaDataOut
.NumberFormat = "0000000000000" '..optional
.EntireColumn.AutoFit '..optional
End With
End If 'Matches > 0
Debug.Print Now()
ErrExit:
' If bReturnMatches Then Matches = UBound(vResult) ' + 1
FilterMatches = (Err = 0): Exit Function
MatchFound:
bMatch = True: Matches = Matches + 1: Resume Next
UniqueList:
vResult(i) = Empty: Matches = Matches + 1: Resume Next
End Function 'FilterMatches()
As I said, this processes 500,000 items in under 8 secs on a XP SP2
machine with 1.66Ghz Intel Duo Core processors. You can swap out the
Now() function and use GetTickCount() in its place if you like, but I
using Now() was sufficient enough for this task.
way off topic to persue there...
The setup I used for testing my function in Excel 2007 (necessary for
the number of rows of data)...
Sub Setup_Data_StripDupes()
Dim lCalcMode&, bEventsEnabled As Boolean
With Application
lCalcMode = .Calculation: bEventsEnabled = .EnableEvents
.Calculation = xlCalculationManual: .EnableEvents = False
.ScreenUpdating = False
End With 'Application
With Range("A1:B500000")
.Formula = "=text(randbetween(1,10^6),""0000000000000"")"
.Value = .Value
End With
With Application
.Calculation = lCalcMode: .EnableEvents = bEventsEnabled
.ScreenUpdating = True
End With 'Application
End Sub
..which generates duplicates and so ideal for the tests since we want
to find matches (or non-matches). This is a typical task when data is
'dumped' into a spreadsheet for analysis/filtering/processing.
I suppose you could do similar in VB and add the results to an array,
but you'd have to do it twice because the function requires separate
arrays be used for the items being searched for matches and the items
being checked for matches. This, then, will obviate the need for using
Excel if the output was dumped into a file, textbox, or listbox.
'''''
The function is called by the following procedure that collects some
input from the user and passes this to the function:
Sub CompareCols_FilterMatches()
Dim bSuccess As Boolean, lMatchesFound As Long
Dim vAns As Variant, vCriteria(5) As Variant, sMsg As String
'Get the label of the columns to act on
Const MSG As String = "Please enter the label of the column"
tryagain:
'Column to filter
sMsg = MSG & " to be filtered": vAns = Application.InputBox(sMsg,
Type:=2)
If vAns = False Or vAns = "" Then Beep: Exit Sub
vCriteria(0) = Range(vAns & "1:" & vAns & Cells(Rows.Count,
vAns).End(xlUp).Row).Address
'Output goes in the column being filtered unless specified otherwise
below
vCriteria(2) = UCase$(vAns)
'Column to be checked
sMsg = MSG & " to check for matches": vAns =
Application.InputBox(sMsg, Type:=2)
If vAns = False Or vAns = "" Then Beep: Exit Sub
vCriteria(1) = Range(vAns & "1:" & vAns & Cells(Rows.Count,
vAns).End(xlUp).Row).Address
'Make sure lists contain more than 1 item
If Not Range(vCriteria(0)).Cells.Count > 1 _
Or Not Range(vCriteria(1)).Cells.Count > 1 Then
sMsg = "Columns MUST have more than one value!" & vbLf & vbLf
sMsg = sMsg & "Please try again with a different set of
columns"
MsgBox sMsg, vbCritical: GoTo tryagain
End If
'Column to receive the results
sMsg = MSG & "where the new list is to go" & vbLf _
& "(Leave blank or click 'Cancel' to use column '" &
vCriteria(2) & "')"
vAns = Application.InputBox(sMsg, Type:=2)
If Not (vAns = False) And (vAns <> "") Then vCriteria(2) =
UCase$(vAns)
'Return or remove matches?
sMsg = "Do you want to return the matches found instead of removing
them?"
vAns = MsgBox(sMsg, vbYesNo + vbQuestion)
If (vAns = vbYes) Then vCriteria(3) = 1 Else vCriteria(3) = 0
'Return a unique list?
sMsg = "Do you want only unique items in the returned list?" & vbLf &
vbLf & "(No duplicates)"
vAns = MsgBox(sMsg, vbYesNo + vbQuestion) '//YES = no dupes allowed
If (vAns = vbYes) Then vCriteria(4) = 0 Else vCriteria(4) = 1
bSuccess = FilterMatches(lMatchesFound, vCriteria())
If lMatchesFound = 0 Then MsgBox "No matches found!": Exit Sub
If lMatchesFound < 0 Then
sMsg = "Both columns must have more than 1 item!"
sMsg = sMsg & vbLf & vbLf & "Please try again: specify different
columns!"
MsgBox sMsg, vbExclamation: Exit Sub
End If 'lMatchesFound < 0
If bSuccess Then
sMsg = Format(CStr(lMatchesFound), "#,##0") _
& " Matches were found"
If vAns = vbYes Then _
sMsg = sMsg & " (including non-match duplicates)"
MsgBox sMsg '//comment out if using option below
'Optional: Ask to run a process on the new list
' sMsg = sMsg & vbLf & vbLf _
' & "Do you want to process the new list?"
'
' vAns = MsgBox(sMsg, vbYesNo + vbQuestion)
' If vAns = vbYes Then
' 'Code... ('Call' a process to act on the new list)
' End If 'vAns = vbYes
Else
MsgBox "An error occured!"
End If 'bSuccess
End Sub
I'm planning to use a userform so all inputs can be retrieved via a
single dialog rather than multiple prompts.
The function...
Function FilterMatches(Matches As Long, Criteria() As Variant) As
Boolean
' Compares 2 user-specified cols and filters matches found.
' User can also specific target col to receive resulting list.
' Optionally supports returning a unique list or allow duplicates.
' Optionally supports returning matches or non-matches.
'
' Args In: Matches: ByRef var to return number of matches found to
the caller.
'
' vCriteria(): A variant array containing the filtering
parameters.
' Criteria(0) - Address of the values to be filtered
' Criteria(1) - Address of the values to check
' Criteria(2) - Label of the column to put the filtered
list
' Criteria(3) - Numeric value to determine if we return
matches or non-matches
' Criteria(4) - Numeric value to determine if we return a
unique list or allow dupes
'
' Returns: True if matches found and no error occurs;
' False if a: matches not found --OR-- error occurs;
Dim i&, j& 'as long
Dim vFilterRng, vCheckRng, vResult, vaMatches(), vaNoMatches(),
vaDataOut() 'as variant
Dim bReturnMatches As Boolean, bMatch As Boolean, bDupesAllowed As
Boolean
Dim cItemsToCheck As New Collection, sMsg$, sRngOut$ 'as string
'Load the filtering criteria
vFilterRng = Range(Criteria(0)): vCheckRng = Range(Criteria(1)):
sRngOut = Criteria(2)
bReturnMatches = (Criteria(3) = 1): bDupesAllowed = (Criteria(4) = 1)
ReDim vaMatches(UBound(vFilterRng)): ReDim
vaNoMatches(UBound(vFilterRng)): j = 0
'Load the Collection with the values to be checked.
'Collections only allow unique keys so use OERN (no need to check if
they already exist)
Set cItemsToCheck = New Collection: On Error Resume Next
For i = LBound(vCheckRng) To UBound(vCheckRng)
cItemsToCheck.Add Key:=CStr(vCheckRng(i, 1)), Item:=vbNullString
Next 'i
Err.Clear
Debug.Print Now()
'Check the Collection for matches
On Error GoTo MatchFound
For i = LBound(vFilterRng) To UBound(vFilterRng)
bMatch = False '..reset
cItemsToCheck.Add Key:=CStr(vFilterRng(i, 1)), Item:=vbNullString
If bMatch Then
If bReturnMatches Then vaMatches(j) = vFilterRng(i, 1): j = j + 1
Else
vaNoMatches(j) = vFilterRng(i, 1): j = j + 1
cItemsToCheck.Remove CStr(vFilterRng(i, 1)) '..so dupes of it
don't get counted
End If 'bMatch
Next 'i
'Initialize the return list
If bReturnMatches Then vResult = vaMatches Else vResult = vaNoMatches
'Return a list of unique values?
If Not bDupesAllowed Then
On Error GoTo UniqueList
Dim cUniqueList As New Collection
For i = LBound(vResult) To UBound(vResult)
cUniqueList.Add Key:=CStr(vResult(i)), Item:=vbNullString
Next 'i
ReDim vaDataOut(cUniqueList.Count - 1, 0): j = 0
Else
ReDim vaDataOut(UBound(vResult), 0): j = 0
End If 'Not bDupesAllowed
Err.Clear: On Error GoTo ErrExit
'Make the list to return contiguous.
For i = LBound(vaDataOut) To UBound(vaDataOut)
If Not vResult(i) = Empty Then vaDataOut(j, 0) = vResult(i): j = j
+ 1
Next 'i
If Matches > 0 Then '..only write if Matches > 0
Columns(sRngOut).ClearContents
With Range(sRngOut & "1").Resize(UBound(vaDataOut) + 1, 1)
.Value = vaDataOut
.NumberFormat = "0000000000000" '..optional
.EntireColumn.AutoFit '..optional
End With
End If 'Matches > 0
Debug.Print Now()
ErrExit:
' If bReturnMatches Then Matches = UBound(vResult) ' + 1
FilterMatches = (Err = 0): Exit Function
MatchFound:
bMatch = True: Matches = Matches + 1: Resume Next
UniqueList:
vResult(i) = Empty: Matches = Matches + 1: Resume Next
End Function 'FilterMatches()
As I said, this processes 500,000 items in under 8 secs on a XP SP2
machine with 1.66Ghz Intel Duo Core processors. You can swap out the
Now() function and use GetTickCount() in its place if you like, but I
using Now() was sufficient enough for this task.
--
Garry
Free usenet access at http://www.eternal-september.org
ClassicVB Users Regroup!
comp.lang.basic.visual.misc
microsoft.public.vb.general.discussion
Garry
Free usenet access at http://www.eternal-september.org
ClassicVB Users Regroup!
comp.lang.basic.visual.misc
microsoft.public.vb.general.discussion