Excel, at least as of version 2002, will save directly to
XML. When you pick save just choose the xml format. From that point on, you can
edit that XML file directly in Excel and when you re-save Excel will continue to
save it as XML. The point is, you should almost NEVER EVER use xls files EVER
because XLS files are not usable by anything but Excel. Since XML files
are usable by both Excel and other software you'll only confuse yourself and
others if you keep using xls files. In fact, as of Office 2003 Windows will open
Excel created XML files in Excel by default. Even better.
Here's some perl to parse an Excel XML file.
#
use strict;
use warnings;
use XML::Simple;
use Data::Dumper;
use Encode;
my $inFilename = $ARGV[0];
my $outFilename = $ARGV[1];
if (!defined($inFilename) || !defined($outFilename))
{
die ("usage: xlReader.pl infilename outfilename [encoding]\n");
}
my $g_encoding = $ARGV[2];
if (!defined($g_encoding))
{
$g_encoding = "utf8";
}
my $xml = XMLin($inFilename,
ForceArray => 1
);
my $xl = { };
# build hash of rows indexed by rowNumber
# of hash of cells indexed by columnNumber
my $lastRow = 0; # remember the bottom most row we find
my $lastColumn = 0; # remember the right most column we find
{
my $currentRow = 0;
my $rows = $xml->{'Worksheet'}->[0]->{'Table'}->[0]->{'Row'};
foreach my $row (@{$rows})
{
# figure out the row's index
my $rowIndex = $currentRow + 1;
if (defined ($row->{'ss:Index'}))
{
$rowIndex = $row->{'ss:Index'};
}
# track last row
if ($rowIndex > $lastRow) { $lastRow = $rowIndex; }
# assert if we already have this row
if (defined ($xl->{$rowIndex}))
{
die ("row $rowIndex already defined!\n");
}
# make a new empty hash for this row
$xl->{$rowIndex} = { };
my $rowHash = $xl->{$rowIndex};
my $currentCol = 0;
my $cells = $row->{'Cell'};
foreach my $cell (@{$cells})
{
# figure out the column index
my $colIndex = $currentCol + 1;
if (defined ($cell->{'ss:Index'}))
{
$colIndex = $cell->{'ss:Index'}
}
# assert if we already have this column in this row
if (defined($rowHash->{$colIndex}))
{
die "column $colIndex in row $rowIndex already defined!\n";
}
# add the cell here
if (defined ($cell->{'Data'}))
{
$rowHash->{$colIndex} = $cell->{'Data'}->[0];
# track lastCol
if ($colIndex > $lastColumn) { $lastColumn = $colIndex; }
}
$currentCol = $colIndex;
# if a cell is used across multiple cells
# copy the contents to all cells it covers
if (defined($cell->{'ss:MergeAcross'}))
{
$currentCol += $cell->{'ss:MergeAcross'};
if (defined ($cell->{'Data'}))
{
for (my $ii = 1; $ii <= $cell->{'ss:MergeAcross'}; $ii++)
{
$rowHash->{$colIndex + $ii} = $cell->{'Data'}->[0];
}
}
}
}
$currentRow = $rowIndex;
}
}
#print Dumper ($xl);
# at this point we have a hash of rows of columns so we can access any cell
# in the spreadsheet by calling GetCellValue($xl, row, col);
# GetCellValue may return undef if that cell does not exist
# You can also call GetCellValueCheck which will fail if the cell does not
# exist
#
# In the following example we will generate a text file in the following these rules
# 1) Row 1 is used to mark the start ( s ) and end ( e ) of what you want converted
# You can also designate a label column ( l ). If they are same columns you can
# specify them with a comma as in "s,l"
# 2) Row 2 is a description
# 3) Row 3 is the field name, if it is only 1 or 2 characters the description (row 1)
# will be prefixed
# 4) Row 4 is the default row. Any value not appearing below will default to the
# value in row 4
# 5) Actual data stars in row 5
#
#
my $FILE = IO::Handle->new();
my $startColumn;
my $endColumn;
my $labelColumn;
# find start, end and label columns
for (my $colIndex = 1; $colIndex < 300; $colIndex++)
{
my $value = GetCellValue($xl, 1, $colIndex);
if (defined($value))
{
# print "value=($value)\n";
if ($value =~ /(?:^|,)e(?:,|$)/i)
{
$endColumn = $colIndex;
last;
}
if ($value =~ /(?:^|,)s(?:,|$)/i)
{
$startColumn = $colIndex;
}
if ($value =~ /(?:^|,)l(?:,|$)/i)
{
$labelColumn = $colIndex;
}
}
}
if (!defined($startColumn)) { die ("could not find start column\n"); }
if (!defined($endColumn)) { die ("could not find end column\n"); }
open ($FILE, ">" . $outFilename) || die ("could not open file $outFilename\n");
# build stage data
for (my $rowIndex = 5; $rowIndex <= $lastRow; $rowIndex++)
{
print $FILE "#-------------------------------------------------------\n";
if (defined($labelColumn))
{
my $label = GetCellValueCheck($xl, $rowIndex, $labelColumn);
print $FILE "label = ", perl2output($label), "\n";
}
for (my $colIndex = $startColumn; $colIndex <= $endColumn; $colIndex++)
{
# for each column:
# grab description in row 2
# grab label in row 3
# grab default from row 4
# use default if value does not exist
my $description = GetCellValue($xl, 2, $colIndex);
my $columnLabel = GetCellValueCheck($xl, 3, $colIndex);
my $default = GetCellValueCheck($xl, 4, $colIndex);
my $value = GetCellValue($xl, $rowIndex, $colIndex);
if (!defined($value)) { $value = $default; }
# --- if the column label is too short add the cell above it
if (length($columnLabel) < 3)
{
$columnLabel = $description . $columnLabel;
}
# --- exceptions ---
if (!defined ($description)) { $description = "-???-"; }
print $FILE perl2output($columnLabel), " = ", $value,
" # ", perl2output($description), "\n";
}
print $FILE "\n";
}
close $FILE;
exit 0;
sub GetCellValueCheck
{
my ($xl, $row, $col) = @_;
my $value = GetCellValue($xl, $row, $col);
if (!defined ($value))
{
die "required cell " . ToExcelColumn($col) . "$row not defined!\n";
}
return $value;
}
sub GetCellValue
{
my ($xl, $row, $col) = @_;
if (defined($xl->{$row}) && defined($xl->{$row}->{$col}))
{
return $xl->{$row}->{$col}->{'content'};
}
return undef;
}
sub ToExcelColumn
{
my ($col) = @_;
if ($col <= 26) { return chr($col+64); }
return chr(($col - 1 / 26) + 65) . chr((($col - 1) % 26) + 65);
}
sub FromExcelColumn
{
my ($col) = @_;
my $colNdx = 0;
for (my $cc = 0; $cc < length($col); $cc++)
{
$colNdx = $colNdx * 26 + ord(uc(substr($col,$cc,1))) - 65;
}
return $colNdx + 1;
}
sub perl2output
{
my ($str) = @_;
return encode($g_encoding, $str);
}
Just as an example, given a spreadsheet that looks like this
| |
l |
s |
|
|
|
e |
| |
|
|
|
Color |
| |
Name |
Hitpoints |
Icon |
R |
G |
B |
|
default |
label |
1 |
wimpy |
255 |
255 |
255 |
| |
ogre |
10 |
wimpy |
200 |
150 |
10 |
|
|
orc |
15 |
wimpy |
207 |
160 |
15 |
|
|
dragon |
200 |
strong |
|
200 |
123 |
|
|
beholder |
150 |
strong |
|
50 |
12 |
|
|
snake |
5 |
|
|
|
|
The perl above will produce this file
#-------------------------------------------------------
label = label
Hitpoints = 1 # -???-
Icon = wimpy # -???-
ColorR = 255 # Color
ColorG = 255 # Color
ColorB = 255 # Color
#-------------------------------------------------------
label = ogre
Hitpoints = 10 # -???-
Icon = wimpy # -???-
ColorR = 200 # Color
ColorG = 150 # Color
ColorB = 10 # Color
#-------------------------------------------------------
label = orc
Hitpoints = 15 # -???-
Icon = wimpy # -???-
ColorR = 207 # Color
ColorG = 160 # Color
ColorB = 15 # Color
#-------------------------------------------------------
label = dragon
Hitpoints = 200 # -???-
Icon = strong # -???-
ColorR = 255 # Color
ColorG = 200 # Color
ColorB = 123 # Color
#-------------------------------------------------------
label = beholder
Hitpoints = 150 # -???-
Icon = strong # -???-
ColorR = 255 # Color
ColorG = 50 # Color
ColorB = 12 # Color
#-------------------------------------------------------
label = snake
Hitpoints = 5 # -???-
Icon = wimpy # -???-
ColorR = 255 # Color
ColorG = 255 # Color
ColorB = 255 # Color