02
Oct
11

Spring Batch HyperSQL Job Repository


This page describes the process of setting up a semi-permanent job repository that will maintain state between job invocations. No external databases will be necessary since HSQLDB is a pure java implementation of a database.

Background

The spring batch framework offers an in-memory Repository that does not persist domain objects once the batch job is complete. This is a severe limitation since:

  • Job restarts will not be possible (resume on restart functionality)
  • Can not guarantee that two job instances with the same parameters are not launched simultaneously
  • Not suitable for multi-threaded job invocations

Requirements

Solution

In order to have the job repository stored in our application we will be using HSQLDB as our database implementation and and DDLUtils to manage the schema.

Modify the pom.xml and insert the following dependencies.

pom.xml

    <dependency>
        <groupId>org.apache.ddlutils</groupId>
        <artifactId>ddlutils</artifactId>
        <version>1.0</version>
    </dependency>
        <dependency>
            <groupId>org.hsqldb</groupId>
            <artifactId>hsqldb</artifactId>
            <version>1.8.0.10</version>
        </dependency>

DDL

The following xml file is used by Apache DDLUtils to create the Spring Batch JobRepository database. When the application starts for the first time the tables will be created. Each subsequent invocation will cause the DDLUtils to check the database schema against the xml file. If the database is different then it will make the necessary alters to update the database to reflect the schema represented in the below file.

src/main/resources/ddl.xml

<?xml version="1.0"?>
<!DOCTYPE database SYSTEM "http://db.apache.org/torque/dtd/database.dtd">
<database name="testdb">

	<table name="BATCH_JOB_INSTANCE">
		<column name="JOB_INSTANCE_ID" type="BIGINT" primaryKey="true"
			autoIncrement="true"/>
		<column name="VERSION" type="BIGINT" />
		<column name="JOB_NAME" type="VARCHAR" size="100" required="true" />
		<column name="JOB_KEY" type="VARCHAR" size="32" required="true" />
		<unique name="JOB_INST_UN">
		  <unique-column name="JOB_NAME"/>
		  <unique-column name="JOB_KEY"/>
		</unique>
	</table>

    <table name="BATCH_JOB_EXECUTION">
        <column name="JOB_EXECUTION_ID" type="BIGINT" primaryKey="true"/>
        <column name="VERSION" type="BIGINT" />
	    <column name="JOB_INSTANCE_ID" type="BIGINT" required="true"/>
	    <column name="CREATE_TIME" type="TIMESTAMP" required="true"/>
	    <column name="START_TIME" type="TIMESTAMP"/>
	    <column name="END_TIME" type="TIMESTAMP"/>
	    <column name="STATUS" type="VARCHAR" size="10"/>
	    <column name="EXIT_CODE" type="VARCHAR" size="20"/>
	    <column name="EXIT_MESSAGE" type="VARCHAR" size="2500"/>
	    <column name="LAST_UPDATED" type="TIMESTAMP"/>
	    <foreign-key name="JOB_INST_EXEC_FK" foreignTable="BATCH_JOB_INSTANCE">
	       <reference foreign="JOB_INSTANCE_ID" local="JOB_INSTANCE_ID"/>
	    </foreign-key>
    </table>

    <table name="BATCH_JOB_PARAMS">
        <column name="JOB_INSTANCE_ID" type="BIGINT" required="true"/>
	    <column name="TYPE_CD" type="VARCHAR" size="6" required="true"/>
	    <column name="KEY_NAME" type="VARCHAR" size="100" required="true"/> 
	    <column name="STRING_VAL" type="VARCHAR" size="250"/> 
	    <column name="DATE_VAL" type="TIMESTAMP"/> 
	    <column name="LONG_VAL" type="BIGINT"/>
	    <column name="DOUBLE_VAL" type="DOUBLE"/>
        <foreign-key name="JOB_INST_PARAMS_FK" foreignTable="BATCH_JOB_INSTANCE">
           <reference foreign="JOB_INSTANCE_ID" local="JOB_INSTANCE_ID"/>
        </foreign-key>
    </table>

    <table name="BATCH_STEP_EXECUTION">
		<column name="STEP_EXECUTION_ID" type="BIGINT" primaryKey="true" 
		  autoIncrement="true" />
		<column name="VERSION" type="BIGINT" required="true"/>
		<column name="STEP_NAME" type="VARCHAR" size="100" required="true"/>
		<column name="JOB_EXECUTION_ID" type="BIGINT" required="true" />
		<column name="START_TIME" type="TIMESTAMP" required="true"/> 
		<column name="END_TIME" type="TIMESTAMP"/>  
		<column name="STATUS" type=" VARCHAR" size="10"/>
		<column name="COMMIT_COUNT" type="BIGINT"/> 
		<column name="READ_COUNT" type="BIGINT"/>
		<column name="FILTER_COUNT" type="BIGINT"/>
		<column name="WRITE_COUNT" type="BIGINT"/>
		<column name="READ_SKIP_COUNT" type="BIGINT"/>
		<column name="WRITE_SKIP_COUNT" type="BIGINT"/>
		<column name="PROCESS_SKIP_COUNT" type="BIGINT"/>
		<column name="ROLLBACK_COUNT" type="BIGINT"/> 
		<column name="EXIT_CODE" type="VARCHAR" size="20"/>
		<column name="EXIT_MESSAGE" type="VARCHAR" size="2500"/>
		<column name="LAST_UPDATED" type="TIMESTAMP"/>
        <foreign-key name="JOB_EXEC_STEP_FK" foreignTable="BATCH_JOB_EXECUTION">
           <reference foreign="JOB_EXECUTION_ID" local="JOB_EXECUTION_ID"/>
        </foreign-key>
    </table>

    <table name="BATCH_STEP_EXECUTION_CONTEXT">
	    <column name="STEP_EXECUTION_ID" type="BIGINT" primaryKey="true"/>
	    <column name="SHORT_CONTEXT" type="VARCHAR" size="2500" required="true"/>
	    <column name="SERIALIZED_CONTEXT" type="VARCHAR" />
        <foreign-key name="STEP_EXEC_CTX_FK" foreignTable="BATCH_STEP_EXECUTION">
           <reference foreign="STEP_EXECUTION_ID" local="STEP_EXECUTION_ID"/>
        </foreign-key>
    </table>

    <table name="BATCH_JOB_EXECUTION_CONTEXT">
		<column name="JOB_EXECUTION_ID" type="BIGINT" primaryKey="true"/>
		<column name="SHORT_CONTEXT" type="VARCHAR" size="2500" required="true"/>
		<column name="SERIALIZED_CONTEXT" type="VARCHAR"/> 
        <foreign-key name="JOB_EXEC_CTX_FK" foreignTable="BATCH_JOB_EXECUTION">
           <reference foreign="JOB_EXECUTION_ID" local="JOB_EXECUTION_ID"/>
        </foreign-key>        
    </table>
	<table name="BATCH_STEP_EXECUTION_SEQ">
		<column name="ID" type="BIGINT" primaryKey="true" autoIncrement="true" />
	</table>
	<table name="BATCH_JOB_EXECUTION_SEQ">
		<column name="ID" type="BIGINT" primaryKey="true" autoIncrement="true" />
	</table>
	<table name="BATCH_JOB_SEQ">
		<column name="ID" type="BIGINT" primaryKey="true" autoIncrement="true" />
	</table>
    
</database>

Java Code

The following file initializes the Job Repository and creates the schema based on a DDL contained in an xml configuration file. It also allows the system to gracefully shut-down the database and persist the in-memory data back to the file system after batch job completion.

src/main/java/com/test/JobRepositoryInitializer.java

package com.test;

import java.io.InputStreamReader;

import javax.sql.DataSource;

import org.apache.ddlutils.Platform;
import org.apache.ddlutils.PlatformFactory;
import org.apache.ddlutils.io.DatabaseIO;
import org.apache.ddlutils.model.Database;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;

public class JobRepositoryInitializer {
	private DataSource dataSource;
	private String configFile;
	
	/**
	 * This method reads the schema from an xml file and determines
	 * if it needs to issue DDL statements to create or modify tables
	 * already in the database.
	 * 
	 * @throws Exception
	 */
	public void checkDDL() throws Exception {
        Platform platform = PlatformFactory
        .createNewPlatformInstance(dataSource);

        // Apache DDLUtils framework is used to create/modify schema.
		Database database = new DatabaseIO().read(new InputStreamReader(
		        getClass().getResourceAsStream(configFile)));
		
		platform.alterTables(database, false);
		System.out.println("HSQLDB is ready: " + platform);
	}

	public void shutdownDatabase() {
        SimpleJdbcTemplate template = new SimpleJdbcTemplate(dataSource);
        template.update("SHUTDOWN;");
        System.out.println("HSQLDB was gracefully shutdown. ");
	}
	public void setDataSource(DataSource dataSource) {
		this.dataSource = dataSource;
	}

	public DataSource getDataSource() {
		return dataSource;
	}

	public void setConfigFile(String configFile) {
		this.configFile = configFile;
	}

	public String getConfigFile() {
		return configFile;
	}	
}

Configuration

In order to get the application to use HSQLDB and generate the JobRepository DDL Schema you need to do the following.

Configure the database

The following bean allows you to define a dataSource that will be used to connect to the HSQLDB database. The database files will be located in the src/main/resources/db folder. (This is specified in the database URL property)

src/main/resources/applicationContext.xml

<bean name="dataSource" class="org.springframework.jdbc.datasource.DriverManagerDataSource">
        <property name="driverClassName" value="org.hsqldb.jdbcDriver"/>
        <property name="url" value="jdbc:hsqldb:file:src/main/resources/db/testdb"/>
        <property name="username" value="sa"/>
        <property name="password" value=""/>
</bean>

Job Repository Configuration

Update the configuration for the job repository to look like this:

src/main/resources/applicationContext.xml

    <beans:bean id="jobRepository"
        class="org.springframework.batch.core.repository.support.JobRepositoryFactoryBean">
        <beans:property name="databaseType" value="HSQL"/>
        <beans:property name="dataSource" ref="dataSource"/>
        <beans:property name="transactionManager" ref="transactionManager" />
    </beans:bean>

Apache DDLUtil Configuration

When the application starts for the first time the system will need to create the JobRepository tables. The following Spring Bean initializes the job repository and creates tables that are defined in the
src/main/resources/applicationContext.xml

	<beans:bean id="jobRepositoryInitializer" class="com.test.JobRepositoryInitializer"
		init-method="checkDDL" destroy-method="shutdownDatabase">
		<beans:property name="dataSource" ref="dataSource" />
		<beans:property name="configFile" value="/ddl.xml" />
	</beans:bean>

Run the application

To run the job from the command line type the following:

mvn exec:java -Dexec.mainClass=org.springframework.batch.core.launch.support.CommandLineJobRunner -Dexec.args="simpleJob.xml helloWorldJob"

You will notice that if you run the job again the console will print the following:

SEVERE: Job Terminated in error: A job instance already exists and is complete for parameters={}. If you want to run this job again, change the parameters.
org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException: A job instance already exists and is complete for parameters={}. If you want to run this job again, change the parameters.

This is a successful indication that the HSQLDB is working persisting the data. Spring Batch framework is indicating that the helloWorld job invocation has already happened. More than one invocation with the same parameters is not allowed. If you want to read more about this then please read the following article.

You can also read the database script file located here:

src/main/resources/db/testdb.script

INSERT INTO BATCH_JOB_INSTANCE VALUES(0,0,'helloWorldJob','d41d8cd98f00b204e9800998ecf8427e')
INSERT INTO BATCH_JOB_EXECUTION VALUES(0,2,0,'2011-10-02 19:53:54.601000000','2011-10-02 19:53:54.617000000','2011-10-02 19:53:54.642000000','COMPLETED','COMPLETED','','2011-10-02 19:53:54.642000000')
INSERT INTO BATCH_STEP_EXECUTION VALUES(0,3,'step1',0,'2011-10-02 19:53:54.626000000','2011-10-02 19:53:54.639000000','COMPLETED',1,0,0,0,0,0,0,0,'COMPLETED','','2011-10-02 19:53:54.639000000')
INSERT INTO BATCH_STEP_EXECUTION_CONTEXT VALUES(0,'{"map":""}',NULL)
INSERT INTO BATCH_JOB_EXECUTION_CONTEXT VALUES(0,'{"map":""}',NULL)
INSERT INTO BATCH_STEP_EXECUTION_SEQ VALUES(0)
INSERT INTO BATCH_JOB_EXECUTION_SEQ VALUES(0)
INSERT INTO BATCH_JOB_SEQ VALUES(0)

HSQLDB uses this file to persist data to the file system in-between batch invocations.

That’s all for now!

About these ads

4 Responses to “Spring Batch HyperSQL Job Repository”


  1. 1 ramki
    December 26, 2011 at 12:28 pm

    Hmm, I think the jobRepositoryInitializer bean entry in the applicationContext.xml file needs to initialize the “configFile” attribute.

  2. 3 Rishi
    October 12, 2012 at 9:07 am

    Firstly great post and all works great with Hsql 1.8.

    With Hsql 2.2.8 using the DDL I get the below error.

    Caused by: org.apache.ddlutils.DatabaseOperationException: Error while executing SQL CREATE TABLE BATCH_JOB_INSTANCE
    (
    JOB_INSTANCE_ID BIGINT IDENTITY,
    VERSION BIGINT,
    JOB_NAME VARCHAR(100) NOT NULL,
    JOB_KEY VARCHAR(32) NOT NULL,
    PRIMARY KEY (JOB_INSTANCE_ID)
    )
    at org.apache.ddlutils.platform.PlatformImplBase.evaluateBatch(PlatformImplBase.java:331)
    at org.apache.ddlutils.platform.PlatformImplBase.alterTables(PlatformImplBase.java:573)
    at org.apache.ddlutils.platform.PlatformImplBase.alterTables(PlatformImplBase.java:507)
    at com.emc.ema.extractor.JobRepositoryInitializer.checkDDL(JobRepositoryInitializer.java:31)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeCustomInitMethod(AbstractAutowireCapableBeanFactory.java:1581)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1522)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1452)
    … 12 more
    Caused by: java.sql.SQLSyntaxErrorException: primary key already exist


Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s


Enter your email address to subscribe to this blog and receive notifications of new posts by email.

Join 60 other followers

October 2011
S M T W T F S
« Jul   Jan »
 1
2345678
9101112131415
16171819202122
23242526272829
3031  

Blog Stats

  • 601,808 hits

Follow

Get every new post delivered to your Inbox.

Join 60 other followers

%d bloggers like this: