背景:
最近遇到一个需求,用户从系统中导出一张Excel数据表,需要对表进行筛选、去重、数据分割、转置为一列数据。
具体的需求:根据公司名称筛选数据,得出内容1的数据,并将内容1去重,分割字符串,转置为一列数据。表格数据一般不超过1w行。
数据图如下:
解决方案:
因为只是对表格操作,对于用户来说,最简单的操作还是直接使用Excel进行操作,所以选择VBA解决。
文件结构如下:VBA筛选去重分割转置数据文件夹下,运行程序:Demo.xlsm,源数据存放位置:Start_Source
完成存储位置:Finish_Result,源数据表:Source.xlsx,完成表:finish_Result.xlsx
Demo程序界面:
用户操作:输入筛选条件[公司名称],需要转置的列名,点击[执行],运行完成后,得出finish_Result.xlsx
VBA代码:
Option Explicit '强制检查,未声明变量不允许使用
Private Sub run_Click()
On Error Resume Next
Application.ScreenUpdating = False '关闭屏幕刷新
Dim time_Start As Date, time_End As Date, time_Count As Date
time_Start = Time
'提取输入的筛选条件文本titleText,companyText并获取文本长度len_companyText
Dim titleText As String, companyText As String, len_companyText As Integer
titleText = Title_Text.Text
companyText = Company_Text.Text
len_companyText = Len(companyText)
'获取当前执行程序文件路径
Dim current_pathName As String
current_pathName = ThisWorkbook.Path
'定义程序执行完成,文件存储路径
Dim finish_pathName As String
finish_pathName = current_pathName & "\" & "Finish_Result"
'判断存储路径是否有Finish文件夹,如果没有,创建Finish
If Dir(finish_pathName, vbDirectory) = "" Then
MkDir (finish_pathName)
End If
'定义源数据路径
Dim source_pathName As String
source_pathName = current_pathName & "\" & "Start_Source"
'定义源数据表单,如果源数据表不存在,程序停止执行
Dim source_fileName As String, sf_exist As String
source_fileName = source_pathName & "\" & "Source.xlsx"
sf_exist = Dir(source_fileName)
If sf_exist = "" Then
Dim nMsg As Long
nMsg = MsgBox("源数据表不存在,程序结束!", vbOKOnly, "提示")
If nMsg = vbOK Then Exit Sub
End If
'读取源数据表单
Dim source_wb As Workbook, source_ws As Worksheet
Set source_wb = Workbooks.Open(source_fileName)
Set source_ws = source_wb.Worksheets("Source_Sheet")
'定义源数据表单总行数row_Count,总列数col_Count
Dim row_Count As Integer, col_Count As Integer
row_Count = source_ws.UsedRange.Rows.Count
col_Count = source_ws.UsedRange.Columns.Count
'将获取到的数据写入数组arr
Dim arr() As String
Dim i As Integer, j As Integer
For i = 1 To col_Count
For j = 1 To row_Count
ReDim Preserve arr(0 To row_Count - 1, 0 To col_Count - 1) As String
arr(j - 1, i - 1) = source_ws.Cells(j, i).Value
Next j
Next i
'定义数组表头的边界,上界 Lb ,下界Ub
Dim Lb As Integer, Ub As Integer
Lb = LBound(arr, 2)
Ub = UBound(arr, 2)
'定义数组表头title_Data,根据表头数据确定取值范围的两列在数组中的索引
Dim title_Data As String
Dim ai As Integer, bi As Integer
For i = Lb To Ub
title_Data = arr(0, i)
If title_Data = "公司名称" Then
ai = i
End If
If title_Data = titleText Then
bi = i
End If
Next i
'根据输入的icompanyText筛选值与数组iContent对比,相同的取值jContent,存入字典i_dict去重
Dim iContent As String, jContent As String, i_dict As Object
Set i_dict = CreateObject("scripting.dictionary")
For i = 1 To row_Count - 1
iContent = arr(i, ai)
iContent = Left(iContent, len_companyText)
If iContent = companyText Then
jContent = arr(i, bi)
i_dict(jContent) = ""
End If
Next i
' 创建写入数据的新表
Dim new_fileName As String, new_wb As Object, new_ws As Object
new_fileName = finish_pathName & "\" & "finish_Result.xlsx"
Set new_wb = Workbooks.Add
Set new_ws = new_wb.Worksheets("Sheet1")
Application.DisplayAlerts = False
new_wb.SaveAs Filename:=new_fileName, FileFormat:=xlOpenXMLWorkbook, CreateBackup:=False
new_wb.Close
Application.DisplayAlerts = True
Set new_wb = Nothing
Set new_ws = Nothing
Dim finish_wb As Workbook, finish_ws As Worksheet
Set finish_wb = Workbooks.Open(new_fileName)
Set finish_ws = finish_wb.Worksheets("Sheet1")
'遍历字典,分割字符串,转置为一列
Dim i_str, mut_arr() As String, a As Integer, b As Integer, mutarr_Count As Integer, id_x As Integer
b = 1
For Each i_str In i_dict.keys
mut_arr = Split(i_str, " | ")
'定义mutarr_Count为分割数组mut_arr的字符串个数
mutarr_Count = (UBound(mut_arr) - LBound(mut_arr)) + 1
'根据分割字符串数组下标进行循环,起始下标为0
For a = 0 To mutarr_Count - 1
'将分割的字符依次写入新表Sheet1的A列单元格中
finish_ws.Range("A" & CStr(b)).Value = mut_arr(a)
b = b + 1
Next a
Next
'保存表格数据
Application.DisplayAlerts = False
finish_wb.SaveAs Filename:=new_fileName, FileFormat:=xlOpenXMLWorkbook, CreateBackup:=False
Application.DisplayAlerts = True
'执行完毕,关闭源数据表,关闭存储数据表,释放对象实例
Application.DisplayAlerts = False
source_wb.Close
finish_wb.Close
Application.DisplayAlerts = True
Set finish_ws = Nothing
Set finish_wb = Nothing
Set i_dict = Nothing
Set source_ws = Nothing
Set source_wb = Nothing
time_End = Time
time_Count = time_End - time_Start
Application.ScreenUpdating = True '开启屏幕刷新
MsgBox time_Count 'demo测试运行时间计时
End Sub
Private Sub Company_Text_Change()
End Sub
Private Sub Title_Text_Change()
End Sub
具体解释看注释,其中注意点:
为保证VBA的性能,尽可能减少OLE引用,少用Range,多用数组,关闭屏幕刷新,提高运行效率。
实测1000条类似数据,花费时间1.5s左右,基本满足用户需求。