思路
- 获取HBase连接,创建
HBaseAdmin
对象。
- 遍历表中的所有行,对于每一行,获取该行的所有列族和列的所有版本数据。
- 比较每个版本数据的时间戳与特定时间(2023 - 01 - 01 00:00:00),如果时间戳早于特定时间,则删除该版本的数据。
- 操作完成后关闭HBase连接。
关键代码片段(Java示例)
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class HBaseDeleteOldData {
private static final String TABLE_NAME = "table1";
private static final String SPECIFIC_TIME = "2023-01-01 00:00:00";
public static void main(String[] args) {
Configuration conf = HBaseConfiguration.create();
try (Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin()) {
Table table = connection.getTable(TableName.valueOf(TABLE_NAME));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date specificDate = sdf.parse(SPECIFIC_TIME);
for (Result result : scanner) {
Delete delete = new Delete(result.getRow());
for (Cell cell : result.rawCells()) {
long timestamp = cell.getTimestamp();
Date cellDate = new Date(timestamp);
if (cellDate.before(specificDate)) {
delete.addColumns(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell), timestamp);
}
}
table.delete(delete);
}
scanner.close();
table.close();
} catch (IOException | ParseException e) {
e.printStackTrace();
}
}
}