杭州桐庐县国税局:删除文本文件中相同的行
来源:百度文库 编辑:偶看新闻 时间:2024/04/28 12:59:48
删除文本文件中相同的行!
如果不使用数据库,怎么样才能删除文本文件中相同的行呢?手工?!!10w行的记录你试试手工!??
下面这个脚本演示了如果删除 文本文件中 相同的行!
Const adOpenStatic = 3
Const adLockOptimistic = 3
Const adCmdText = &H0001
Set objConnection = CreateObject("ADODB.Connection")
Set objRecordSet = CreateObject("ADODB.Recordset")
Const ForReading = 1, ForWriting = 2, ForAppending = 8
strPathToTextFile = "D:\"
strFile = "done.txt"
objConnection.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
"Data Source=" & strPathtoTextFile & ";" & _
"Extended Properties=""text;HDR=NO;FMT=Delimited"""
objRecordSet.Open "Select DISTINCT * FROM " & strFile, _
objConnection, adOpenStatic, adLockOptimistic, adCmdText
Do Until objRecordSet.EOF
str = objRecordSet.Fields.Item(0).Value
Dim fso, f
Set fso = CreateObject("Scripting.FileSystemObject")
Set f = fso.OpenTextFile("D:\namelist.txt", ForAppending, True)
f.WriteLine str
f.Close
objRecordSet.MoveNext
Loop
D:\done.txt 是原始文件
D:\namelist.txt 是生成后的文件
把上面这些代码,复制到一个txt文件中,修改成你所要的功能
另存为 *.vbs 后缀名就可以了!
===================================================
如何以A、B列为条件删除相同行
我想设定用宏直接删除相同行(比如:A1、B1与A4、B4数据相同则删除A4、B4行),请大师们帮助一下。谢谢!
[CODE]
Sub test()
Dim mColl As New Collection
Dim iRow As Long, i As Long
Dim pt As Range
On Error Resume Next
With Sheet1
iRow = .[a65536].End(xlUp).Row
For i = 3 To iRow
mColl.Add CStr(.Cells(i, 1) & .Cells(i, 2)), CStr(.Cells(i, 1) & .Cells(i, 2))
If Err.Number <> 0 Then
If pt Is Nothing Then
Set pt = .Cells(i, 1)
Else
Set pt = Union(pt, .Cells(i, 1))
End If
Err.Clear
End If
Next
End With
pt.EntireRow.Select
End Sub
[/CODE]
如果数据很多的话加一句:
application.ScreenUpdating=False
[[i] 本帖最后由 HOmT398 于 2006-11-13 16:31 编辑 [/i]]
进一步请教:我需要把d列的数据也加为条件,您的代码可作怎样的变通呢?
请看我的附件。谢谢!
版主的方法很好!
进一步请教:我需要把d列的数据也加为条件,您的代码可作怎样的变通呢?
请看我的附件。谢谢! [/quote]
参考
[code]
Sub test()
Dim mColl As New Collection
Dim iRow As Long, i As Long
Dim pt As Range
Dim strKey As String
On Error Resume Next
With Sheet1
iRow = .[a65536].End(xlUp).Row
For i = 3 To iRow
strKey = CStr(.Cells(i, 1) & .Cells(i, 2) & .Cells(i, 4))
mColl.Add strKey, strKey
If Err.Number <> 0 Then
If pt Is Nothing Then
Set pt = .Cells(i, 1)
Else
Set pt = Union(pt, .Cells(i, 1))
End If
Err.Clear
End If
Next
End With
pt.EntireRow.Select
End Sub
[/code]
再改一改
[code]Sub test()
Dim mColl As New Collection
Dim iRow As Long, i As Long
Dim pt As Range
Dim strKey As String
On Error Resume Next
Application.ScreenUpdating = False
With Sheet1
iRow = .[a65536].End(xlUp).Row
For i = 3 To iRow
strKey = CStr(.Cells(i, 1) & .Cells(i, 2) & .Cells(i, 4))
If Len(strKey) > 0 Then
mColl.Add strKey, strKey
If Err.Number <> 0 Then
If pt Is Nothing Then
Set pt = .Cells(i, 1)
Else
Set pt = Union(pt, .Cells(i, 1))
End If
Err.Clear
End If
End If
Next
End With
Application.ScreenUpdating=True
pt.EntireRow.Select
End Sub[/code]
===============================================
假设源文件叫dup.txt,用以下命令行生成过滤后的unique.txt:
copy /y nul unique.txt && for /f "delims=" %a in (dup.txt) do @(findstr /b /e /c:"%a" unique.txt >nul || echo.%a>>unique.txt)
效率不高,如果数据量特别大的话,慎用。