# Purpose: Format a tab delimited export from CONTENTdm into a # dSpace Bulk Ingest package # # Requirements:: # - PHP 5.* CLI with fopen enabled # - Admin access to the CONTENTdm collection # - filesystem access to the digital objects # # Caveat: This is pretty ugly code. Apologies in advance. # # ------------------------------------------------------------------------- # # --- Initial Configuration Options: # php.ini override to attempt to detect line ending type # Comment it out if you don't want/need it. ini_set('auto_detect_line_endings', 1); # Test mode (on/off): on: Prints out what WOULD happen. Does not execute # off: Proceeds with creating and populating the package $test_mode = "on"; # Location of physical items on the local filesystem $path_to_items = "/path-to-items/"; # Location of Destination project - Created if doesn't exist $project_home = "/project_home/"; # The shortname used by Contentdm or any other unique project label $collection_shortname = "uw"; # Name and location of mapped tab-delimited data file $tab_metadata_file = "sample-export-modified.txt"; # An array of patterns to exclude from output if lines match. # Useful for excluding compound object records if flattening the object $exclude_filters = array ( "Side *1", "Side *2" ); # End Configuration Options # Destination on the local filesytem for the dSpace Bulk Ingest package $project_base_dir = "$project_home$collection_shortname"; # Create initial directories -- Check for existence first if (!file_exists($project_base_dir)&&$test_mode == "off") {mkdir($project_base_dir, 0700, TRUE);} $filter = implode("|", $exclude_filters); $input_file = file($tab_metadata_file); $drc_labels = split("\t", $input_file[0]); $cdm_labels = split("\n", $input_file[1]); # lop the first two lines from the array of lines to get dspace --> cdm mapping $input_file = array_slice($input_file, 2); foreach ($input_file as $unf_line) { if(!preg_match("/$filter/",$unf_line)) { $metadata[] = $unf_line;} } foreach ($metadata as $line) { $fields = split("\t", $line); # Combine drc_labels and fields arrays using drc_labels as keys and fields as values $drc_metadata = array_combine($drc_labels, $fields); # output xml declaration $dc_record = "\n"; foreach($drc_metadata as $key => $value) { if(!preg_match("/cdmfile|subject|skip/",$key)) { if (ereg("\.", $key)) { $qualifiers = split("\.", $key); $dc_record = $dc_record . "$value\n"; } else { $dc_record = $dc_record . "$value\n"; } } # Process Subjects - This get's ugly if (ereg("subject", $key)) { $subjects = split(";", $value); if (ereg("\.", $key)) { $qualifiers = split("\.", $key); foreach ($subjects as $sub) { if ($sub != "") {$dc_record = $dc_record . "$sub\n";} } } else { foreach ($subjects as $sub) { if ($sub != "") {$dc_record = $dc_record . "$sub\n";} } } } } # output closing $dc_record = $dc_record . "\n"; $counter++; $item_path = "$project_base_dir/$counter"; # Call assembly functions make_item_folder($item_path); output_dcrecord($dc_record); move_bitstreams($item_path); if ($test_mode == "on") { print " --- End of Record ---\n";} } # Begin Additional Functions function output_dcrecord($dc_record) { global $dc_record, $item_path, $test_mode; if ($test_mode == "off") { $metadata_handle = fopen("$item_path/dublin_core.xml", "w+"); fwrite($metadata_handle, $dc_record); fclose($metadata_handle); } else { print "TEST dc xml: $dc_record\n"; } } function make_item_folder($item_path) { global $item_path, $test_mode; if (!file_exists($item_path)&&$test_mode == "off") {mkdir($item_path, 0700);} else { print "TEST setting item path: Item path would be: $item_path\n"; } } function move_bitstreams($item_path) { # Customized code to locate and copy bitstreams into bundle # Additionally, needs to write out the content manifest based on files moved # Generic example follows: global $item_path, $drc_metadata, $path_to_items, $test_mode; $filename = $drc_metadata["identifier.other"]; $manifest = $manifest . "$filename\tbundle:ORIGINAL\n"; if ($test_mode == "off") { copy("$path_to_items$filename", "$item_path/$filename"); $contents_handle = fopen("$item_path/contents", "w+"); fwrite($contents_handle, $manifest); fclose($contents_handle); } else { print "TEST copy bitstreams: cp $path_to_items$filename $item_path/$filename\n"; print "TEST manifest: $manifest\n"; } } ?>