1. 按固定数量拆分
- 策略阐述:将表按照固定数量划分为若干个region。比如要创建的表预计数据量较大,提前规划将其分为10个region,每个region负责一部分数据。
- 代码实现(Java):
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.util.Bytes;
public class HBasePreSplitByFixedCount {
public static void main(String[] args) throws Exception {
org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
byte[][] splits = new byte[9][];
for (int i = 1; i < 10; i++) {
splits[i - 1] = Bytes.toBytes(i + "");
}
TableName tableName = TableName.valueOf("your_table_name");
TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
.addColumnFamily(ColumnFamilyDescriptorBuilder.of(Bytes.toBytes("cf")))
.build();
admin.createTable(tableDescriptor, splits);
admin.close();
connection.close();
}
}
create 'your_table_name', 'cf', {SPLITS => ['1', '2', '3', '4', '5', '6', '7', '8', '9']}
2. 按哈希值拆分
- 策略阐述:对行键进行哈希运算,根据哈希值范围进行region拆分。这样能较为均匀地分布数据,避免数据热点。例如对行键进行MD5哈希,根据哈希值划分区间。
- 代码实现(Java):
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.MD5Hash;
public class HBasePreSplitByHash {
public static void main(String[] args) throws Exception {
org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
int numRegions = 10;
byte[][] splits = new byte[numRegions - 1][];
for (int i = 1; i < numRegions; i++) {
byte[] hash = MD5Hash.getMD5AsHex(Bytes.toBytes(i + "")).getBytes();
splits[i - 1] = hash;
}
TableName tableName = TableName.valueOf("your_table_name");
TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
.addColumnFamily(ColumnFamilyDescriptorBuilder.of(Bytes.toBytes("cf")))
.build();
admin.createTable(tableDescriptor, splits);
admin.close();
connection.close();
}
}
# 这里假设使用自定义脚本生成哈希值分割点
# 首先生成哈希值分割点的文件 hash_splits.txt
# 内容类似:md5_hash_value_1
# md5_hash_value_2
# ...
create 'your_table_name', 'cf', {SPLITS_FILE => 'hash_splits.txt'}
3. 按行键前缀拆分
- 策略阐述:根据行键的前缀进行拆分。如果行键设计有一定规律,比如以日期前缀(如20230101_xxx),可以按日期范围进行拆分,每个region负责特定日期范围的数据。
- 代码实现(Java):
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.util.Bytes;
public class HBasePreSplitByRowKeyPrefix {
public static void main(String[] args) throws Exception {
org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
byte[][] splits = new byte[3][];
splits[0] = Bytes.toBytes("20230101");
splits[1] = Bytes.toBytes("20230201");
splits[2] = Bytes.toBytes("20230301");
TableName tableName = TableName.valueOf("your_table_name");
TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
.addColumnFamily(ColumnFamilyDescriptorBuilder.of(Bytes.toBytes("cf")))
.build();
admin.createTable(tableDescriptor, splits);
admin.close();
connection.close();
}
}
create 'your_table_name', 'cf', {SPLITS => ['20230101', '20230201', '20230301']}