# Purpose: Format a tab delimited export from CONTENTdm into a
# dSpace Bulk Ingest package
#
# Requirements::
# - PHP 5.* CLI with fopen enabled
# - Admin access to the CONTENTdm collection
# - filesystem access to the digital objects
#
# Caveat: This is pretty ugly code. Apologies in advance.
#
# ------------------------------------------------------------------------- #
# --- Initial Configuration Options:
# php.ini override to attempt to detect line ending type
# Comment it out if you don't want/need it.
ini_set('auto_detect_line_endings', 1);
# Test mode (on/off): on: Prints out what WOULD happen. Does not execute
# off: Proceeds with creating and populating the package
$test_mode = "on";
# Location of physical items on the local filesystem
$path_to_items = "/path-to-items/";
# Location of Destination project - Created if doesn't exist
$project_home = "/project_home/";
# The shortname used by Contentdm or any other unique project label
$collection_shortname = "uw";
# Name and location of mapped tab-delimited data file
$tab_metadata_file = "sample-export-modified.txt";
# An array of patterns to exclude from output if lines match.
# Useful for excluding compound object records if flattening the object
$exclude_filters = array
(
"Side *1",
"Side *2"
);
# End Configuration Options
# Destination on the local filesytem for the dSpace Bulk Ingest package
$project_base_dir = "$project_home$collection_shortname";
# Create initial directories -- Check for existence first
if (!file_exists($project_base_dir)&&$test_mode == "off") {mkdir($project_base_dir, 0700, TRUE);}
$filter = implode("|", $exclude_filters);
$input_file = file($tab_metadata_file);
$drc_labels = split("\t", $input_file[0]);
$cdm_labels = split("\n", $input_file[1]);
# lop the first two lines from the array of lines to get dspace --> cdm mapping
$input_file = array_slice($input_file, 2);
foreach ($input_file as $unf_line)
{
if(!preg_match("/$filter/",$unf_line)) { $metadata[] = $unf_line;}
}
foreach ($metadata as $line)
{
$fields = split("\t", $line);
# Combine drc_labels and fields arrays using drc_labels as keys and fields as values
$drc_metadata = array_combine($drc_labels, $fields);
# output xml declaration
$dc_record = "\n";
foreach($drc_metadata as $key => $value)
{
if(!preg_match("/cdmfile|subject|skip/",$key))
{
if (ereg("\.", $key))
{
$qualifiers = split("\.", $key);
$dc_record = $dc_record . "$value\n";
} else
{
$dc_record = $dc_record . "$value\n";
}
}
# Process Subjects - This get's ugly
if (ereg("subject", $key))
{
$subjects = split(";", $value);
if (ereg("\.", $key))
{
$qualifiers = split("\.", $key);
foreach ($subjects as $sub)
{
if ($sub != "") {$dc_record = $dc_record . "$sub\n";}
}
} else
{
foreach ($subjects as $sub)
{
if ($sub != "") {$dc_record = $dc_record . "$sub\n";}
}
}
}
}
# output closing
$dc_record = $dc_record . "\n";
$counter++;
$item_path = "$project_base_dir/$counter";
# Call assembly functions
make_item_folder($item_path);
output_dcrecord($dc_record);
move_bitstreams($item_path);
if ($test_mode == "on") { print " --- End of Record ---\n";}
}
# Begin Additional Functions
function output_dcrecord($dc_record)
{
global $dc_record, $item_path, $test_mode;
if ($test_mode == "off")
{
$metadata_handle = fopen("$item_path/dublin_core.xml", "w+");
fwrite($metadata_handle, $dc_record);
fclose($metadata_handle);
} else {
print "TEST dc xml: $dc_record\n";
}
}
function make_item_folder($item_path)
{
global $item_path, $test_mode;
if (!file_exists($item_path)&&$test_mode == "off") {mkdir($item_path, 0700);} else
{
print "TEST setting item path: Item path would be: $item_path\n";
}
}
function move_bitstreams($item_path)
{
# Customized code to locate and copy bitstreams into bundle
# Additionally, needs to write out the content manifest based on files moved
# Generic example follows:
global $item_path, $drc_metadata, $path_to_items, $test_mode;
$filename = $drc_metadata["identifier.other"];
$manifest = $manifest . "$filename\tbundle:ORIGINAL\n";
if ($test_mode == "off")
{
copy("$path_to_items$filename", "$item_path/$filename");
$contents_handle = fopen("$item_path/contents", "w+");
fwrite($contents_handle, $manifest);
fclose($contents_handle);
}
else {
print "TEST copy bitstreams: cp $path_to_items$filename $item_path/$filename\n";
print "TEST manifest: $manifest\n";
}
}
?>