Why SqlBulkCopy?
Database means different things to different people. To a lot of people (where I work), an Excel file is a database. So I recently got asked to take an Excel file and massage it into usable data on our web site. (of course automating the process weekly) The first thing many people would think to do is to create a Web Service, but to me it's overkill - especially since we work on a completely M$ network. Instead why not create a simple client side .NET executable that uploads a file that the server can dump into a database?
Client Side
On the client side we're doing the following:
Just make sure the sample Excel file, created with OpenOffice.org 2.1, is located in the same directory as the .NET executable. The best part is that System.Data.Odbc does not require M$ Office to read Excel.
Example .NET code to get this done, comments inline:
using System;
using System.Data;
using System.Data.Odbc;
using System.IO;
using System.Net;
using System.Reflection;
using System.Runtime.Serialization.Formatters.Binary;
public class bc {
public static void Main() {
try {
bc b = new bc();
Console.WriteLine(b.upload_ds());
}
catch (Exception e) { Console.WriteLine(e.StackTrace); }
}
// ========================================
// constructor/public methods
// ========================================
public bc() {
try {
_init();
// "sheet1/2"; each excel sheet like a database 'table', and
// used to fill dataset with datatables
_fill_dt(_order, "sheet1", new DataTable("sbc_test_order") );
_fill_dt(_product, "sheet2", new DataTable("sbc_test_product") );
}
catch { throw; }
}
// upload the data
public string upload_ds() {
string err_url = String.Format("Upload error: {0}\n", _upload_url);
try {
// stupid, but necessary
_ds.RemotingFormat = SerializationFormat.Binary;
// else this becomes meaningless - serialized as XML
BinaryFormatter bf = new BinaryFormatter();
using (Stream s = new FileStream
( _serial_ds, FileMode.Create, FileAccess.Write, FileShare.None) )
{
bf.Serialize(s, _ds);
}
// upload; .aspx page does single Response.Write
return System.Text.Encoding.ASCII.GetString(
new WebClient().UploadFile(_upload_url, "POST", _serial_ds)
);
}
catch (System.Net.WebException e) {
string err = err_url + e.Message;
if (e.InnerException != null) err += e.InnerException;
return err;
}
catch (Exception e) { return err_url + e.StackTrace; }
}
// ========================================
// private methods
// ========================================
// sanity check; make sure excel file exists
private void _init() {
_app_dir =
Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
if ( !File.Exists( Path.Combine(_app_dir, _xls) ) )
throw new Exception(String.Format("{0} doesn't exist", _xls));
}
// dump excel data into dataset
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// SqlBulkCopy may **CRASH** sql server express 2005. (full versions??)
// also gives **USELESS** exception messages: http://tinyurl.com/yyllm2
// double-check table schema/datatypes/field length during testing.
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
private void _fill_dt(string[] fields, string sheet, DataTable dt) {
try {
// ODBC/excel connectionstring needs (1) excel filename, (2) sheetname
string cs = String.Format(
@"Driver={{Microsoft Excel Driver (*.xls)}};
DriverId=790;Dbq={0};DefaultDir={1}",
_xls, _app_dir
);
using (OdbcConnection c = new OdbcConnection(cs)) {
string cmd_txt = String.Format(
@"SELECT {0} FROM [{1}$]", _field_names(fields), sheet
);
using (OdbcCommand cmd = new OdbcCommand(cmd_txt, c)) {
using (OdbcDataAdapter da = new OdbcDataAdapter(cmd)) {
da.Fill(dt);
}
_ds.Tables.Add(dt);
}
}
}
catch { throw; }
}
// excel column names have spaces, database **DOES NOT**; format
private string _field_names(string[] s) {
for (int i = 0; i < s.Length; ++i)
s[i] = String.Format("[{0}]", s[i]);
return String.Join(",", s);
}
// ========================================
// rest of class private members
// ========================================
// directory where this executable resides
private string _app_dir;
// excel file must be in same directory as this executable!
private readonly string _xls = "test.xls";
// ========================================
// WebClient is used to send/upload the data
// ========================================
// ASP.NET page receiving data
private readonly string _upload_url = @"http://localhost/cs/bc.aspx";
// WebClient.UploadFile() needs a **file name**; we're creating
// a serialized DataSet, which the SqlBulkCopy object in the
// ASP.NET page needs to to dump the data into the database
private readonly string _serial_ds = "sql_bc.serialized";
// the dataset and collection of datatable objects. each array
// below stores table fields/column names, which need to be
// parsed **before** setting OdbcCommand.CommandText. some column
// names in the excel file have spaces in them, but the database
// tables **DO NOT**; see _field_names() method above
private DataSet _ds = new DataSet(NAME_THE_DATASET);
private readonly string[] _order = {
"order id", "product id", "quantity"
};
private readonly string[] _product = {
"product id", "product name"
};
// ****************************************
// END
// ****************************************
}
Hope you caught the comments regarding SQL Server 2005 Express. It crashed four or five times before I figured out what was going on...
Server Side
On the server side we're doing the following:
-
Accepting the file upload and deserializing the
DataSet, if it meets certain criteria. Although omitted for brevity in the code below, you should set a password, check file size and where the upload is coming from, or do whatever you feel necessary to secure access to the page.
-
Iterating over each
DataTable in the DataSet, mapping DataTable columns to SQL Server table columns so that data is copied correctly from source to destination. This step isn't necessary in our example since the Excel file and database tables share the same number of columns and positions, but in real life you probably won't have this luxury. (I didn't)
- Use the .NET 2.0 SqlBulkCopy class, which provides functionality similar to the SQL Server bcp utility, to copy the data in each
DataTable to it's respective database table. See the last section of this MSDN article for claims on how good SqlBulkCopy performs.
The example ASP.NET page to get this done, comments inline:
<%@ page language='c#' autoeventwireup='false' %>
<%@ import namespace='System.Data' %>
<%@ import namespace='System.Data.SqlClient' %>
<%@ import namespace='System.IO' %>
<%@ import namespace='System.Runtime.Serialization.Formatters.Binary' %>
<script runat='server'>
protected override void OnLoad(EventArgs e) {
// results for client (.NET executable) logging
Response.Write( _process_upload() );
}
private readonly string _database = "pubs.dbo.";
private readonly string _cs =
ConfigurationManager.ConnectionStrings["YOUR_KEY"].ConnectionString;
// deserialize uploaded dataset
private string _process_upload() {
string results = "upload succeded:\n";
try {
/*
sanity checks
*/
using ( Stream s = Request.Files[0].InputStream ) {
BinaryFormatter bf = new BinaryFormatter();
DataSet ds = (DataSet) bf.Deserialize(s);
_add_dataset(ds);
// show number of records processed for each table
foreach (DataTable dt in ds.Tables) {
results += String.Format("{0}: {1} records processed\n",
dt.TableName, dt.Rows.Count
);
}
}
}
catch (Exception e) { return e.Message + "\n" + e.StackTrace; }
// success!!
return results;
}
// we (a) dump **entire** dataset, or (b) make no changes to database
private void _add_dataset(DataSet ds) {
DataTableCollection dtc = ds.Tables;
using (SqlConnection c = new SqlConnection(_cs)) {
c.Open();
using ( SqlTransaction t = c.BeginTransaction() ) {
try {
foreach (DataTable dt in dtc) {
// clear database tables before inserting new records
string delete_sql = String.Format("DELETE FROM {0}{1}",
_database, dt.TableName
);
using (SqlCommand cmd = new SqlCommand(delete_sql, c)) {
cmd.Transaction = t;
cmd.ExecuteNonQuery();
}
}
// bulk copy; iterate over each datatable in the dataset
using (SqlBulkCopy bc = new SqlBulkCopy(
c, SqlBulkCopyOptions.TableLock, t
)) {
foreach (DataTable dt in dtc) {
_map_datatable(bc, dt);
bc.WriteToServer(dt);
// required when processing multiple bulk copy operations,
// **after** each WriteToServer() method call
bc.ColumnMappings.Clear();
}
}
t.Commit();
}
catch {
t.Rollback();
throw;
}
}
}
}
// dataset-datatable mapping => sql server
private void _map_datatable(SqlBulkCopy bc, DataTable dt) {
// datatables names must be **EXACTLY** same as SQL server tables
bc.DestinationTableName = _database + dt.TableName;
foreach (DataColumn dc in dt.Columns) {
// build SQL server column names:
string field_name = Regex.Replace(
dc.ColumnName.ToLower(), // 1. lowercase
@"[^a-z]+", // alpha characters - 'words'
"_" // separated by underscore
);
// map datatable columns to sql server columns
bc.ColumnMappings.Add(dc.ColumnName, field_name);
}
}
// ****************************************
// END
// ****************************************
</script>
Rounding it Out
The database schema; change the USE statement if you want to use another database:
USE pubs;
CREATE TABLE sbc_test_order (
order_id integer PRIMARY KEY NOT NULL,
product_id integer,
quantity integer
);
CREATE TABLE sbc_test_product (
product_id integer PRIMARY KEY NOT NULL,
product_name varchar(100)
);