Thursday, June 18, 2009

Secondary indexes in HBase

Creating secondary indexes in HBase-0.19.3:

You need to enable indexing in HBase before you can create a secondary index on columns. Edit the file $HBASE_INSTALL_DIR/conf/hbase-site.xml and add the following property to it.

    <property>
        <name>hbase.regionserver.class</name>
        <value>org.apache.hadoop.hbase.ipc.IndexedRegionInterface</value>
    </property>

    <property>
        <name>hbase.regionserver.impl</name>
        <value>
        org.apache.hadoop.hbase.regionserver.tableindexed.IndexedRegionServer
        </value>
    </property>

Adding secondary index while creating table:

    HBaseConfiguration conf = new HBaseConfiguration();
    conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

    HTableDescriptor desc = new HTableDescriptor("test_table");

    desc.addFamily(new HColumnDescriptor("columnfamily1:"));
    desc.addFamily(new HColumnDescriptor("columnfamily2:"));

    desc.addIndex(new IndexSpecification("column1",
        Bytes.toBytes("columnfamily1:column1")));

    desc.addIndex(new IndexSpecification("column2",
        Bytes.toBytes("columnfamily1:column2")));


    IndexedTableAdmin admin = null;
    admin = new IndexedTableAdmin(conf);

    admin.createTable(desc);

Adding index in an existing table:

    HBaseConfiguration conf = new HBaseConfiguration();
    conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

    IndexedTableAdmin admin = null;
    admin = new IndexedTableAdmin(conf);

    admin.addIndex(Bytes.toBytes("test_table"), new IndexSpecification("column2",
    Bytes.toBytes("columnfamily1:column2")));

Deleting existing index from a table.

    HBaseConfiguration conf = new HBaseConfiguration();
    conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

    IndexedTableAdmin admin = null;
    admin = new IndexedTableAdmin(conf);

    admin.removeIndex(Bytes.toBytes("test_table"), "column2");

Reading from secondary indexed columns:

To read from a secondary index, get a scanner for the index and scan through the data.

    HBaseConfiguration conf = new HBaseConfiguration();
    conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

    IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

    // You need to specify which columns to get
    Scanner scanner = table.getIndexedScanner("column1",
        HConstants.EMPTY_START_ROW, null, null, new byte[][] {
        Bytes.toBytes("columnfamily1:column1"),
        Bytes.toBytes("columnfamily1:column2") });

    for (RowResult rowResult : scanner) {
        String value1 = new String(
            rowResult.get(Bytes.toBytes("columnfamily1:column1")).getValue());

        String value2 = new String(
            rowResult.get(Bytes.toBytes("columnfamily1:column2")).getValue());

        System.out.println(value1 + ", " + value2);
    }

    table.close();

To get a scanner to a subset of the rows specify a column filter.

    ColumnValueFilter filter =
        new ColumnValueFilter(Bytes.toBytes("columnfamily1:column1"),

        CompareOp.LESS, Bytes.toBytes("value1-10"));

    scanner = table.getIndexedScanner("column1", HConstants.EMPTY_START_ROW,
        null,
filter, new byte[][] { Bytes.toBytes("columnfamily1:column1"),
        Bytes.toBytes("columnfamily1:column2")
);

    for (RowResult rowResult : scanner) {
        String value1 = new String(
            rowResult.get(Bytes.toBytes("columnfamily1:column1")).getValue());

        String value2 = new String(
            rowResult.get(Bytes.toBytes("columnfamily1:column2")).getValue());

        System.out.println(value1 + ", " + value2);
    }

Example Code:

import java.io.IOException;
import java.util.Date;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Scanner;
import org.apache.hadoop.hbase.client.tableindexed.IndexSpecification;
import org.apache.hadoop.hbase.client.tableindexed.IndexedTable;
import org.apache.hadoop.hbase.client.tableindexed.IndexedTableAdmin;
import org.apache.hadoop.hbase.filter.ColumnValueFilter;
import org.apache.hadoop.hbase.filter.ColumnValueFilter.CompareOp;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;

public class SecondaryIndexTest {
    public void writeToTable() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

        String row = "test_row";
        BatchUpdate update = null;

        for (int i = 0; i < 100; i++) {
            update = new BatchUpdate(row + i);
            update.put("columnfamily1:column1", Bytes.toBytes("value1-" + i));
            update.put("columnfamily1:column2", Bytes.toBytes("value2-" + i));
            table.commit(update);
        }

        table.close();
    }

    public void readAllRowsFromSecondaryIndex() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

        Scanner scanner = table.getIndexedScanner("column1",
            HConstants.EMPTY_START_ROW, null, null, new byte[][] {
            Bytes.toBytes("columnfamily1:column1"),
                Bytes.toBytes("columnfamily1:column2") });


        for (RowResult rowResult : scanner) {
            System.out.println(Bytes.toString(
                rowResult.get(Bytes.toBytes("columnfamily1:column1")).getValue())
                + ", " + Bytes.toString(rowResult.get(
                Bytes.toBytes("columnfamily1:column2")).getValue()
                ));
        }

        table.close();
    }

    public void readFilteredRowsFromSecondaryIndex() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

        ColumnValueFilter filter =
            new ColumnValueFilter(Bytes.toBytes("columnfamily1:column1"),

            CompareOp.LESS, Bytes.toBytes("value1-40"));

        Scanner scanner = table.getIndexedScanner("column1",
            HConstants.EMPTY_START_ROW, null, filter,
            new byte[][] { Bytes.toBytes("columnfamily1:column1"),
                Bytes.toBytes("columnfamily1:column2")

            });

        for (RowResult rowResult : scanner) {
            System.out.println(Bytes.toString(
                rowResult.get(Bytes.toBytes("columnfamily1:column1")).getValue())
                + ", " + Bytes.toString(rowResult.get(
                Bytes.toBytes("columnfamily1:column2")).getValue()
                ));
        }

        table.close();
    }

    public void createTableWithSecondaryIndexes() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        HTableDescriptor desc = new HTableDescriptor("test_table");

        desc.addFamily(new HColumnDescriptor("columnfamily1:column1"));
        desc.addFamily(new HColumnDescriptor("columnfamily1:column2"));

        desc.addIndex(new IndexSpecification("column1",
            Bytes.toBytes("columnfamily1:column1")));
        desc.addIndex(new IndexSpecification("column2",
            Bytes.toBytes("columnfamily1:column2")));

        IndexedTableAdmin admin = null;
        admin = new IndexedTableAdmin(conf);

        if (admin.tableExists(Bytes.toBytes("test_table"))) {
            if (admin.isTableEnabled("test_table")) {
                admin.disableTable(Bytes.toBytes("test_table"));
            }

            admin.deleteTable(Bytes.toBytes("test_table"));
        }

        if (admin.tableExists(Bytes.toBytes("test_table-column1"))) {
            if (admin.isTableEnabled("test_table-column1")) {
                admin.disableTable(Bytes.toBytes("test_table-column1"));
            }

            admin.deleteTable(Bytes.toBytes("test_table-column1"));
        }

        admin.createTable(desc);
    }

    public void addSecondaryIndexToExistingTable() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTableAdmin admin = null;
        admin = new IndexedTableAdmin(conf);

        admin.addIndex(Bytes.toBytes("test_table"),
            new IndexSpecification("column2",
            Bytes.toBytes("columnfamily1:column2")));

    }

    public void removeSecondaryIndexToExistingTable() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTableAdmin admin = null;
        admin = new IndexedTableAdmin(conf);

        admin.removeIndex(Bytes.toBytes("test_table"), "column2");
    }

    public static void main(String[] args) throws IOException {
        SecondaryIndexTest test = new SecondaryIndexTest();

        test.createTableWithSecondaryIndexes();
        test.writeToTable();
        test.addSecondaryIndexToExistingTable();
        test.removeSecondaryIndexToExistingTable();
        test.readAllRowsFromSecondaryIndex();
        test.readFilteredRowsFromSecondaryIndex();

        System.out.println("Done!");
    }
}
Post a Comment