Skip to content

Commit a60944d

Browse files
committed
found a small pile of old unicorn to koha migration scripts written for windows.
1 parent 69ddac5 commit a60944d

File tree

5 files changed

+2098
-0
lines changed

5 files changed

+2098
-0
lines changed
Lines changed: 352 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,352 @@
1+
#!/usr/bin/perl
2+
3+
use strict;
4+
use warnings;
5+
use MARC::Batch;
6+
print '
7+
Unicorn to Koha holdings converter
8+
9+
This program will convert MARC holdings from SirsiDynix Unicorn GL 3.1
10+
to something that Koha 3.0 can use. It may work on other versions of these
11+
two programs (including SirsiDynix Symphony), but it has not been tested.
12+
To get MARC holdings from Unicorn, use the MARCExport utility with the
13+
"Include 999 holdings tag" option ticked.
14+
The easiest way to use this program is to place your Unicorn MARC
15+
file into the same directory as this program. The converted filename will
16+
be the same as the input name, but with "_out.mrc" appended.
17+
';
18+
19+
20+
print "\nEnter name of MARC input file:\n";
21+
my $input_marc_file = <STDIN>;
22+
chomp $input_marc_file;
23+
print "\nProcessing File:\nThis may take a few minutes...\n";
24+
25+
##my $output_file = $input_marc_file . '_out.mrc';
26+
##open(MARCOUTPUT, "> $output_file") or die $!;
27+
my $batch = MARC::Batch->new('USMARC',$input_marc_file);
28+
open(MARCOUTPUT, "> $input_marc_file out.mrc") or die $!;
29+
open(MARCDROPPED, "> $input_marc_file dropped.mrc") or die $!;
30+
31+
## Our records are crap. GIGO ;-)
32+
$batch->strict_off();
33+
34+
## We'll print warnings in a more useful way than the default
35+
$batch->warnings_off();
36+
37+
## Define variables to hold the number of records and itemswe have iterated through
38+
my $recordcount;
39+
my $itemcount;
40+
## In case we decide that the record isn't worth importing
41+
my $marc_drop;
42+
43+
while (my $record = $batch->next()) {
44+
## Increment the record counter
45+
$recordcount++;
46+
47+
## We import the record unless we know better
48+
$marc_drop = 0;
49+
50+
51+
52+
## Get the record's unicorn holdings. Unicorn's 999 and Koha's 952 are repeatable, so we are using a list context
53+
my @unicorn_holdings = $record->field('999');
54+
foreach my $unicorn_holding (@unicorn_holdings) {
55+
56+
57+
## Load the holdings information from the proper subfields
58+
## If you store information in different subfields,
59+
## this is the place to change it.
60+
my $full_call_number = $unicorn_holding->subfield('a');
61+
my $copy_number = $unicorn_holding->subfield('c');
62+
my $date_last_seen = &convert_date($unicorn_holding->subfield('d'));
63+
my $date_last_borrowed = &convert_date($unicorn_holding->subfield('e'));
64+
my $barcode = $unicorn_holding->subfield('i');
65+
66+
# In Koha, the shelving location code must be defined in Authorized Value category 'LOC' in default installation
67+
# By default this is set up to remap some locations to those available in a default Koha install.
68+
# Check out the convert_location subroutine later on to see if you need to make adjustments for your library
69+
# Otherwise, you can comment out the reference to the subroutine that converts locations
70+
#my $shelving_location = $unicorn_holding->subfield('l'); # Uncomment this line if you want to leave your locations as they are
71+
my $shelving_location = &convert_location($unicorn_holding->subfield('k'),$unicorn_holding->subfield('l')); # Comment this line if you want to leave your locations as they are
72+
73+
# Check out the convert_status subroutine later on to see if you need to make adjustments for your library
74+
my $status_lost = &convert_status($unicorn_holding->subfield('l'));
75+
my $permanent_location = $unicorn_holding->subfield('m');
76+
77+
# Current location is usually the same as permanent location, but maybe not for your library
78+
# In Koha, this code must be defined in System Administration -> Libraries, Branches and Groups
79+
my $current_location = $unicorn_holding->subfield('m');
80+
my $total_checkouts = $unicorn_holding->subfield('n');
81+
82+
# In Koha, the item type code must be defined in System Administration -> Item types and Circulation Codes
83+
# By default this is set up to remap some item types to those available in a default Koha install.
84+
# Check out the convert_item_type subroutine later on to see if you need to make adjustments for your library
85+
# Otherwise, you can comment out the reference to the subroutine that converts item types
86+
#my $item_type = $unicorn_holding->subfield('t'); # Uncomment this line if you want to leave your item types as they are
87+
#my $item_type = &convert_item_type($unicorn_holding->subfield('t')); # Uncomment this line if you want to your koha item types to be base soley on Unicorn item types
88+
my $item_type = &convert_item_type($unicorn_holding->subfield('t'),$unicorn_holding->subfield('x')); # Uncomment this line if you want your Koha item types to be based on Unicorn item types and Unicorn item category 1
89+
90+
my $date_acquired = &convert_date($unicorn_holding->subfield('u'));
91+
my $source_of_classification_or_shelving_scheme = &convert_classification_source($unicorn_holding->subfield('w'));
92+
93+
## Collection codes are completely optional in Koha, but they are a coded value,
94+
## meaning they must have a matching Authorized Value category ('CCODE' in default installation))
95+
## This maps them from Unicorn's 'Item Category 1', but that may not make sense for you.
96+
my $collection_code;
97+
$collection_code = $unicorn_holding->subfield('x'); # Comment this line if you don't want to use your Unicorn item categories as collection codes in Koha.
98+
99+
## I just set everything to be lend-able and undamaged
100+
my $not_for_loan = '0';
101+
my $status_damaged = '0';
102+
103+
104+
my $serial_enumeration_chronology;
105+
106+
107+
## Stuff that Koha wants that I haven't mapped yet. Email suggestions to jsherman@usao.edu
108+
my $status_withdrawn;
109+
my $materials_specified;
110+
111+
my $use_restrictions;
112+
my $normalized_classification_for_sort;
113+
114+
my $source_of_acquisition;
115+
my $cost_normal_purchase_price;
116+
117+
my $shelving_control_number;
118+
my $total_renewals;
119+
my $total_holds;
120+
my $checked_out;
121+
my $uniform_resource_identifier;
122+
my $cost_replacement_price;
123+
my $date_price_effective_from;
124+
125+
126+
## Create a new holdings object and insert it before the unicorn holdings field, then
127+
my $koha_holdings = MARC::Field->new('952', '', '', 0=> $status_withdrawn, 1=> $status_lost, 2=> $source_of_classification_or_shelving_scheme, 3=> $materials_specified, 4=> $status_damaged, 5=> $use_restrictions, 6=> $normalized_classification_for_sort, 7=> $not_for_loan, 8=> $collection_code, a=> $permanent_location, b=> $current_location, c=> $shelving_location, d=> $date_acquired, e=> $source_of_acquisition, g=> $cost_normal_purchase_price, h=> $serial_enumeration_chronology, j=> $shelving_control_number, l=> $total_checkouts, m=> $total_renewals, n=> $total_holds, o=> $full_call_number, p=> $barcode, q=> $checked_out, r=> $date_last_seen, s=> $date_last_borrowed, t=> $copy_number, u=> $uniform_resource_identifier, v=> $cost_replacement_price, w=> $date_price_effective_from, y=> $item_type );
128+
#$record->insert_fields_before($unicorn_holding,$koha_holdings);
129+
$record->insert_grouped_field($koha_holdings);
130+
## Increment the item counter
131+
$itemcount++;
132+
133+
## delete the unicorn holdings
134+
$record->delete_field( $unicorn_holding );
135+
136+
my @warnings = $batch->warnings();
137+
if (@warnings) {
138+
my $warning_title = $record->subfield(245,'a');
139+
print STDOUT "Record $recordcount \"$warning_title\": $warnings[0]\n";
140+
} }
141+
142+
## Add record to the output files
143+
if ($marc_drop == 1) {
144+
print MARCDROPPED $record->as_usmarc();
145+
} else {
146+
print MARCOUTPUT $record->as_usmarc();
147+
} }
148+
149+
close(MARCDROPPED);
150+
close(MARCOUTPUT);
151+
152+
## Tell us how many records and items processed
153+
print "\n$recordcount records processed\n$itemcount items processed\n";
154+
155+
## Subroutines that clean up the subfields that are formatted differently between the two systems
156+
157+
## convert dates from M/D/YYYY to YYYY-MM-DD
158+
sub convert_date {
159+
my $date = $_[0];
160+
if (defined $date){
161+
if ($date =~ /([^\/]+)\/([^\/]+)\/([^\/]+)/) {
162+
my $month = $1;
163+
my $day = $2;
164+
$month = sprintf("%2d", $month);
165+
$month=~ tr/ /0/;
166+
$day = sprintf("%2d", $day);
167+
$day=~ tr/ /0/;
168+
return "$3-$month-$day";
169+
} } }
170+
171+
## Convert status from location field string in unicorn to an integer in koha
172+
## You might need to customize status to your library.
173+
## The values we need to get for a default koha install are:
174+
## 0 = Available, 1 = Lost, 2= Long Overdue (Lost), 3 = Lost and Paid For, 4 = Missing in Inventory, 5 = Missing in Hold Queue
175+
sub convert_status {
176+
my $location = $_[0];
177+
if (($location eq 'LOST') || ($location eq 'LOST-CLAIM') || ($location eq 'MATHER LAB')) {
178+
return '1'; } elsif ($location eq 'LOST-ASSUM') { ## ask kelly or rhonda about LOST-ASSUM
179+
return '2'; } elsif ($location eq 'LOST-PAID') {
180+
return '3'; } elsif ($location eq 'MISSING') {
181+
return '4'; } else {
182+
return '0'; }
183+
}
184+
185+
## Convert some shelving location field strings in unicorn to Koha defaults
186+
## Since I have no way of knowing your shelving locations, you either need to customize this for your library or
187+
## define shelving location codes in Authorized Value category 'LOC' in your Koha installation
188+
## The values we need to get for a default koha install are:
189+
## AV = Audio Visual, CHILD = Children's Area, DISPLAY = On Display, FIC = Fiction,
190+
## GEN = General Stacks, NEW = New Materials Shelf, REF = Reference, STAFF = Staff Office
191+
192+
193+
## ANNA LEWIS|ARCHIVES|BACKLOG|BINDERY|CATALOGING|CHECKEDOUT|CIRC DESK|CURRENT|DIRECTOR|DISCARD|DRAMA|EBOOK|EDUROOM-C|EDUROOM-J|EDUROOM-L|ELECTRONIC|FOURTH FL.|HOLDS|ILL|INPROCESS|INTERNET|INTRANSIT|LOST|LOST-ASSUM|LOST-CLAIM|LOST-PAID|LOWITT|MATHER LAB|MICROFORM|MISSING|ON-EXHIBIT|ON-ORDER|OVERSIZE|PERIODICAL|REF-AV|REFERENCE|REPAIR|RESERVES|RESHELVING|REVIEW|ROOM 201|SCI DEPT|SECOND FL|SPARKS|SRVCLRNG|STACKS|STORAGE|SYSTEMSLIB|TECSERVICE|THOMAS LAB|UNKNOWN
194+
195+
#other nash locations
196+
#EBOOK,ELECTRONIC,|INTERNET
197+
#|STORAGE|BACKLOG|BINDERY|CIRC DESK|
198+
#|OVERSIZE|PERIODICAL|MICROFORM|ANNA LEWIS|ARCHIVES|CURRENT|LOWITT
199+
#|DISCARD|DRAMA|
200+
#|EDUROOM-C|EDUROOM-J|EDUROOM-L
201+
#|HOLDS|ILL|INTRANSIT
202+
#|ON-ORDER|
203+
#|REPAIR|RESERVES|
204+
#RESHELVING|REVIEW|
205+
#|SCI DEPT|SPARKS|SRVCLRNG|THOMAS LAB|MATHER LAB
206+
#|UNKNOWN|CHECKEDOUT
207+
208+
209+
210+
211+
212+
sub convert_location {
213+
my $current_location = $_[0];
214+
my $home_location = $_[1];
215+
216+
## FYI, anything in MATHER LAB is lost. handled in convert_status
217+
218+
## The items with the following current locations aren't going in
219+
## ANNA LEWIS, BACKLOG, CIRC
220+
## DESK, CURRENT, DISCARD, EBOOK, ELECTRONIC, FOURTH FL., HOLDS, INTERNET, LOWITT,
221+
## PERIODICAL, REVIEW, SCI DEPT, SECOND FL, SRVCLRNG, TECSERVICE, or UNKNOWN
222+
223+
if (($current_location eq 'ANNA LEWIS') || ($current_location eq 'BACKLOG') || ($current_location eq 'CIRC DESK') || ($current_location eq 'CURRENT') || ($current_location eq 'DISCARD') || ($current_location eq 'EBOOK') || ($current_location eq 'ELECTRONIC') || ($current_location eq 'HOLDS') || ($current_location eq 'INTERNET') || ($current_location eq 'LOWITT') || ($current_location eq 'REVIEW') || ($current_location eq 'SECOND FL') || ($current_location eq 'TECSERVICE') || ($current_location eq 'UNKNOWN') || ($current_location eq 'FOURTH FL.') || ($current_location eq 'PERIODICAL') || ($current_location eq 'SCI DEPT') || ($current_location eq 'SRVCLRNG')) {
224+
$marc_drop = 1;
225+
} elsif ($current_location eq 'THOMAS LAB') {
226+
return 'AUSTIN-???';
227+
} elsif ($current_location eq 'BINDERY') {
228+
return 'BINDERY';
229+
} elsif ($current_location eq 'ON-EXHIBIT') {
230+
return 'DISPLAY';
231+
} elsif ($current_location eq 'DRAMA') {
232+
return 'DAVIS-106C';
233+
} elsif ($current_location eq 'EDUROOM-C') {
234+
return 'NASH-305C';
235+
} elsif ($current_location eq 'EDUROOM-J') {
236+
return 'NASH-305J';
237+
} elsif ($current_location eq 'EDUROOM-L') {
238+
return 'NASH-305L';
239+
} elsif ($current_location eq 'STACKS') {
240+
return 'GEN';
241+
} elsif (($current_location eq 'ROOM 201') || ($current_location eq 'MICROFORM')) {
242+
return 'NASH-201';
243+
} elsif ($current_location eq 'DIRECTOR') {
244+
return 'NASH-202';
245+
} elsif (($current_location eq 'REF-AV') || ($current_location eq 'REFERENCE')) {
246+
return 'NASH-203';
247+
} elsif ($current_location eq 'ARCHIVES') {
248+
return 'NASH-301';
249+
} elsif ($current_location eq 'SYSTEMSLIB') {
250+
return 'NASH-303';
251+
} elsif ($current_location eq 'ON-ORDER') {
252+
return 'ON-ORDER';
253+
} elsif ($current_location eq 'OVERSIZE') {
254+
return 'OVERSIZE';
255+
} elsif ($current_location eq 'RESERVES') {
256+
return 'NASH-203-RES';
257+
} elsif ($current_location eq 'SPARKS') {
258+
return 'SPARKS';
259+
} elsif ($current_location eq 'STORAGE') {
260+
return 'STORAGE';
261+
262+
## The items with the following current locations are going in by home location
263+
## CATALOGING, CHECKEDOUT, ILL, INPROCESS, INTRANSIT, REPAIR, RESHELVING
264+
} elsif (($current_location eq 'CATALOGING') || ($current_location eq 'CHECKEDOUT') || ($current_location eq 'ILL') || ($current_location eq 'INPROCESS') || ($current_location eq 'INTRANSIT') || ($current_location eq 'REPAIR') || ($current_location eq 'RESHELVING')) {
265+
if ($home_location eq 'THOMAS LAB') {
266+
return 'AUSTIN-???';
267+
} elsif ($home_location eq 'BINDERY') {
268+
return 'BINDERY';
269+
} elsif ($home_location eq 'ON-EXHIBIT') {
270+
return 'DISPLAY';
271+
} elsif ($home_location eq 'DRAMA') {
272+
return 'DAVIS-106C';
273+
} elsif ($home_location eq 'EDUROOM-C') {
274+
return 'NASH-305C';
275+
} elsif ($home_location eq 'EDUROOM-J') {
276+
return 'NASH-305J';
277+
} elsif ($home_location eq 'EDUROOM-L') {
278+
return 'NASH-305L';
279+
} elsif ($home_location eq 'STACKS') {
280+
return 'GEN';
281+
} elsif (($home_location eq 'ROOM 201') || ($home_location eq 'MICROFORM')) {
282+
return 'NASH-201';
283+
} elsif ($home_location eq 'DIRECTOR') {
284+
return 'NASH-202';
285+
} elsif (($current_location eq 'REF-AV') || ($current_location eq 'REFERENCE')) {
286+
return 'NASH-203';
287+
} elsif ($home_location eq 'ARCHIVES') {
288+
return 'NASH-301';
289+
} elsif ($home_location eq 'SYSTEMSLIB') {
290+
return 'NASH-303';
291+
} elsif ($home_location eq 'ON-ORDER') {
292+
return 'ON-ORDER';
293+
} elsif ($home_location eq 'OVERSIZE') {
294+
return 'OVERSIZE';
295+
} elsif ($home_location eq 'RESERVES') {
296+
return 'NASH-203-RES';
297+
} elsif ($home_location eq 'SPARKS') {
298+
return 'SPARKS';
299+
} elsif ($home_location eq 'STORAGE') {
300+
return 'STORAGE';
301+
} } }
302+
303+
304+
## Convert some unicorn item type strings in unicorn to Koha defaults
305+
## Since I have no way of knowing your item types, you either need to customize this for your library or
306+
## define item type codes in Item Types Administration in your Koha installation
307+
## For our library, it made sense to use a combination of item types and item categories (mapped to $collection_code)
308+
## The values we need to get for a default koha install are:
309+
## BK = Books, CF = Computer Files, CR = Continuing Resources, MP = Maps,
310+
## MU = Music, MX = Mixed Materials, REF = Reference, VM = Visual Materials
311+
312+
## other Nash item categories
313+
##|INDEX|KIT|MICROFICHE|MICROFILM|UNKNOWN
314+
315+
sub convert_item_type {
316+
my $type = $_[0];
317+
my $unicorn_item_category_1 = $_[1];
318+
if (($type eq 'BOOK') || ($type eq 'ILL-BOOK') || ($type eq 'INDEX') || ($type eq 'JUVAWARDBK') || ($type eq 'NEW-BOOK')) {
319+
return 'BK'; } elsif (($type eq 'COMPU-FILE') || ($type eq 'ELECTRONIC') || ($type eq 'INTERNET')) {
320+
return 'CF'; } elsif (($type eq 'MAGAZINE') || ($type eq 'MICROFORM') || ($type eq 'NEWSPAPER')) {
321+
return 'CR'; } elsif (($type eq 'PERSONAL') || ($type eq 'PERSONL-AV') || ($type eq 'PERSONL-BK')) { ## Must be defined in koha
322+
return 'PERSONAL'; } elsif (($type eq 'KIT') || ($type eq 'GUIDE') || ((defined $unicorn_item_category_1) && ($unicorn_item_category_1 eq 'KIT'))) {
323+
return 'MX'; } elsif (($type eq 'REF-BOOK') || ($type eq 'RESERVE') || ($type eq 'XEROXCOPY')) {
324+
return 'REF'; } elsif ((defined $unicorn_item_category_1) && (($type eq 'AV') || ($type eq 'AV-EQUIP'))) {
325+
if (($unicorn_item_category_1 eq 'CASSETTE') || ($unicorn_item_category_1 eq 'CD') || ($unicorn_item_category_1 eq 'PHONOGRAPH') || ($unicorn_item_category_1 eq 'SCORE')) {
326+
return 'MU'; }
327+
elsif (($unicorn_item_category_1 eq 'DVD') || ($unicorn_item_category_1 eq 'FILMSTRIP') || ($unicorn_item_category_1 eq 'SLIDE') || ($unicorn_item_category_1 eq 'VIDEO')) {
328+
return 'VM'; }
329+
} else {
330+
return $type; }
331+
}
332+
333+
sub convert_classification_source {
334+
my $source = $_[0];
335+
if ($source eq 'LC') {
336+
return 'lcc'; } elsif ($source eq 'DEWEY') {
337+
return 'ddc'; } elsif ($source eq 'AUTO') {
338+
return ''; } else {
339+
## we didn't have anything using 'anscr', 'sudocs', 'udc', or 'z', so I don't know how unicorn encodes them. Email suggestions to jsherman@usao.edu
340+
return $source;
341+
} }
342+
343+
344+
345+
sub convert_serial_holdings {
346+
my $unsplit_362 = $_[0];
347+
#my @split = split(/;/,$unsplit_362);
348+
my @split;
349+
##koha seems to have an 80 character limit on the h subfield of 952
350+
push @split, substr($unsplit_362, 0, 79, "") while length($unsplit_362);
351+
return (@split);
352+
}

0 commit comments

Comments
 (0)