001    /*
002     * StructBuilder.java
003     * 
004     * Copyright (c) 2006, Imperial College.  All rights reserved.
005     *
006     * Redistribution and use in source and binary forms, with or without
007     * modification, are permitted provided that the following conditions are
008     * met:
009     *
010     * - Redistributions of source code must retain the above copyright
011     * notice, this list of conditions and the following disclaimer.
012     *
013     * - Redistributions in binary form must reproduce the above copyright
014     * notice, this list of conditions and the following disclaimer in the
015     * documentation and/or other materials provided with the distribution.
016     *
017     * - Neither the name of Imperial College nor the names of their
018     * contributors may be used to endorse or promote products derived from
019     * this software without specific prior written permission.
020     *
021     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
022     * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
023     * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
024     * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
025     * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
026     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
027     * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
028     * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
029     * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
030     * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
031     * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
032     * DAMAGE.
033     */
034    
035    package org.dspace.administer;
036    
037    import java.io.BufferedWriter;
038    import java.io.File;
039    import java.io.FileWriter;
040    import java.io.IOException;
041    import java.sql.SQLException;
042    import java.util.Enumeration;
043    import java.util.Hashtable;
044    
045    import javax.xml.parsers.DocumentBuilder;
046    import javax.xml.parsers.DocumentBuilderFactory;
047    import javax.xml.parsers.ParserConfigurationException;
048    import javax.xml.transform.TransformerException;
049    
050    import org.apache.commons.cli.CommandLine;
051    import org.apache.commons.cli.CommandLineParser;
052    import org.apache.commons.cli.Options;
053    import org.apache.commons.cli.PosixParser;
054    import org.apache.xpath.XPathAPI;
055    import org.dspace.authorize.AuthorizeException;
056    import org.dspace.content.Collection;
057    import org.dspace.content.Community;
058    import org.dspace.core.Context;
059    import org.dspace.eperson.EPerson;
060    import org.jdom.Element;
061    import org.jdom.output.XMLOutputter;
062    import org.w3c.dom.Document;
063    import org.w3c.dom.Node;
064    import org.w3c.dom.NodeList;
065    import org.xml.sax.SAXException;
066    
067    /**
068     * This class deals with importing community and collection structures from
069     * an XML file.
070     * 
071     * The XML file structure needs to be:
072     * 
073     * <import_structure>
074     *     <community>
075     *         <name>....</name>
076     *         <community>...</community>
077     *         <collection>
078     *             <name>....</name>
079     *         </collection>
080     *     </community>
081     * </import_structure>
082     * 
083     * it can be arbitrarily deep, and supports all the metadata elements
084     * that make up the community and collection metadata.  See the system
085     * documentation for more details
086     * 
087     * @author Richard Jones
088     *
089     */
090    
091    public class StructBuilder
092    {
093        /** the output xml document which will contain updated information about the 
094         * imported structure
095         */
096        private static org.jdom.Document xmlOutput = new org.jdom.Document(new Element("imported_structure"));
097        
098        /** a hashtable to hold metadata for the collection being worked on */
099        private static Hashtable collectionMap = new Hashtable();
100        
101        /** a hashtable to hold metadata for the community being worked on */
102        private static Hashtable communityMap = new Hashtable();
103        
104        /**
105         * Main method to be run from the command line to import a structure into
106         * DSpace
107         * 
108         * This is of the form:
109         * 
110         * StructBuilder -f [xml source] -e [administrator email] -o [output file]
111         * 
112         * The output file will contain exactly the same as the source xml document, but
113         * with the handle for each imported item added as an attribute.
114         */
115        public static void main(String[] argv) 
116            throws Exception
117        {
118            CommandLineParser parser = new PosixParser();
119    
120            Options options = new Options();
121    
122            options.addOption( "f", "file", true, "file");
123            options.addOption( "e", "eperson", true, "eperson");
124            options.addOption("o", "output", true, "output");
125            
126            CommandLine line = parser.parse( options, argv );
127            
128            String file = null;
129            String eperson = null;
130            String output = null;
131            
132            if (line.hasOption('f'))
133            {
134                file = line.getOptionValue('f');
135            }
136            
137            if (line.hasOption('e'))
138            {
139                eperson = line.getOptionValue('e');
140            }
141            
142            if (line.hasOption('o'))
143            {
144                output = line.getOptionValue('o');
145            }
146            
147            if (output == null || eperson == null || file == null)
148            {
149                usage();
150                System.exit(0);
151            }
152            
153            // create a context
154            Context context = new Context();
155            
156            // set the context
157            context.setCurrentUser(EPerson.findByEmail(context, eperson));
158     
159            // load the XML
160            Document document = loadXML(file);
161            
162            // run the preliminary validation, to be sure that the the XML document
163            // is properly structured
164            validate(document);
165            
166            // load the mappings into the member variable hashmaps
167            communityMap.put("name", "name");
168            communityMap.put("description", "short_description");
169            communityMap.put("intro", "introductory_text");
170            communityMap.put("copyright", "copyright_text");
171            communityMap.put("sidebar", "side_bar_text");
172            
173            collectionMap.put("name", "name");
174            collectionMap.put("description", "short_description");
175            collectionMap.put("intro", "introductory_text");
176            collectionMap.put("copyright", "copyright_text");
177            collectionMap.put("sidebar", "side_bar_text");
178            collectionMap.put("license", "license");
179            collectionMap.put("provenance", "provenance_description");
180            
181            // get the top level community list
182            NodeList first = XPathAPI.selectNodeList(document, "/import_structure/community");
183            
184            // run the import starting with the top level communities
185            Element[] elements = handleCommunities(context, first, null);
186            
187            // generate the output
188            Element root = xmlOutput.getRootElement();
189            for (int i = 0; i < elements.length; i++)
190            {
191                root.addContent(elements[i]);
192            }
193            
194            // finally write the string into the output file
195            try 
196            {
197                BufferedWriter out = new BufferedWriter(new FileWriter(output));
198                out.write(new XMLOutputter().outputString(xmlOutput));
199                out.close();
200            } 
201            catch (IOException e) 
202            {
203                System.out.println("Unable to write to output file " + output);
204                System.exit(0);
205            }
206            
207            context.complete();
208        }
209        
210        /**
211         * Output the usage information
212         */
213        private static void usage()
214        {
215            System.out.println("Usage: java StructBuilder -f <source XML file> -o <output file> -e <eperson email>");
216            System.out.println("Communitities will be created from the top level, and a map of communities to handles will be returned in the output file");
217            return;
218        }
219        
220        /**
221         * Validate the XML document.  This method does not return, but if validation
222         * fails it generates an error and ceases execution
223         * 
224         * @param   document        the XML document object
225         * @throws TransformerException
226         * 
227         */
228        private static void validate(org.w3c.dom.Document document)
229            throws TransformerException
230        {
231            StringBuffer err = new StringBuffer();
232            boolean trip = false;
233            
234            err.append("The following errors were encountered parsing the source XML\n");
235            err.append("No changes have been made to the DSpace instance\n\n");
236            
237            NodeList first = XPathAPI.selectNodeList(document, "/import_structure/community");
238            if (first.getLength() == 0)
239            {
240                err.append("-There are no top level communities in the source document");
241                System.out.println(err.toString());
242                System.exit(0);
243            }
244            
245            String errs = validateCommunities(first, 1);
246            if (errs != null)
247            {
248                err.append(errs);
249                trip = true;
250            }
251            
252            if (trip)
253            {
254                System.out.println(err.toString());
255                System.exit(0);
256            }
257        }
258        
259        /**
260         * Validate the communities section of the XML document.  This returns a string
261         * containing any errors encountered, or null if there were no errors
262         * 
263         * @param communities the NodeList of communities to validate
264         * @param level the level in the XML document that we are at, for the purposes
265         *                  of error reporting
266         * 
267         * @return the errors that need to be generated by the calling method, or null if
268         *                  no errors.
269         */
270        private static String validateCommunities(NodeList communities, int level)
271            throws TransformerException
272        {
273            StringBuffer err = new StringBuffer();
274            boolean trip = false;
275            String errs = null;
276            
277            for (int i = 0; i < communities.getLength(); i++)
278            {
279                Node n = communities.item(i);
280                    NodeList name = XPathAPI.selectNodeList(n, "name");
281                    if (name.getLength() != 1)
282                    {
283                        String pos = Integer.toString(i + 1);
284                        err.append("-The level " + level + " community in position " + pos);
285                        err.append(" does not contain exactly one name field\n");
286                        trip = true;
287                    }
288                    
289                    // validate sub communities
290                    NodeList subCommunities = XPathAPI.selectNodeList(n, "community");
291                    String comErrs = validateCommunities(subCommunities, level + 1);
292                    if (comErrs != null)
293                    {
294                        err.append(comErrs);
295                        trip = true;
296                    }
297                    
298                    // validate collections
299                    NodeList collections = XPathAPI.selectNodeList(n, "collection");
300                    String colErrs = validateCollections(collections, level + 1);
301                    if (colErrs != null)
302                    {
303                        err.append(colErrs);
304                        trip = true;
305                    }
306            }
307            
308            if (trip)
309            {
310                errs = err.toString();
311            }
312            
313            return errs;
314        }
315        
316        /**
317         * validate the collection section of the XML document.  This generates a
318         * string containing any errors encountered, or returns null if no errors
319         * 
320         * @param collections a NodeList of collections to validate
321         * @param level the level in the XML document for the purposes of error reporting
322         * 
323         * @return the errors to be generated by the calling method, or null if none
324         */
325        private static String validateCollections(NodeList collections, int level)
326            throws TransformerException
327        {
328            StringBuffer err = new StringBuffer();
329            boolean trip = false;
330            String errs = null;
331            
332            for (int i = 0; i < collections.getLength(); i++)
333            {
334                Node n = collections.item(i);
335                    NodeList name = XPathAPI.selectNodeList(n, "name");
336                    if (name.getLength() != 1)
337                    {
338                        String pos = Integer.toString(i + 1);
339                        err.append("-The level " + level + " collection in position " + pos);
340                        err.append(" does not contain exactly one name field\n");
341                        trip = true;
342                    }
343            }
344            
345            if (trip)
346            {
347                errs = err.toString();
348            }
349            
350            return errs;
351        }
352        
353        /**
354         * Load in the XML from file.
355         * 
356         * @param filename
357         *            the filename to load from
358         * 
359         * @return the DOM representation of the XML file
360         */
361        private static org.w3c.dom.Document loadXML(String filename) 
362            throws IOException, ParserConfigurationException, SAXException
363        {
364            DocumentBuilder builder = DocumentBuilderFactory.newInstance()
365                    .newDocumentBuilder();
366    
367            org.w3c.dom.Document document = builder.parse(new File(filename));
368            
369            return document;
370        }
371        
372        /**
373         * Return the String value of a Node
374         * 
375         * @param node the node from which we want to extract the string value
376         * 
377         * @return the string value of the node
378         */
379        public static String getStringValue(Node node)
380        {
381            String value = node.getNodeValue();
382    
383            if (node.hasChildNodes())
384            {
385                Node first = node.getFirstChild();
386    
387                if (first.getNodeType() == Node.TEXT_NODE)
388                {
389                    return first.getNodeValue().trim();
390                }
391            }
392    
393            return value;
394        }
395        
396        /**
397         * Take a node list of communities and build the structure from them, delegating
398         * to the relevant methods in this class for sub-communities and collections
399         * 
400         * @param context the context of the request
401         * @param communities a nodelist of communities to create along with their subjstructures
402         * @param parent the parent community of the nodelist of communities to create
403         * 
404         * @return an element array containing additional information regarding the 
405         *                  created communities (e.g. the handles they have been assigned)
406         */
407        private static Element[] handleCommunities(Context context, NodeList communities, Community parent)
408            throws TransformerException, SQLException, Exception
409        {
410            Element[] elements = new Element[communities.getLength()];
411            
412            for (int i = 0; i < communities.getLength(); i++)
413            {
414                Community community;
415                Element element = new Element("community");
416                
417                // create the community or sub community
418                if (parent != null)
419                {
420                    community = parent.createSubcommunity();
421                }
422                else
423                {
424                    community = Community.create(null, context);
425                }
426                
427                // default the short description to be an empty string
428                community.setMetadata("short_description", " ");
429                
430                // now update the metadata
431                Node tn = communities.item(i);
432                Enumeration keys = communityMap.keys();
433                while (keys.hasMoreElements())
434                {
435                    Node node = null;
436                    String key = (String) keys.nextElement();
437                    NodeList nl = XPathAPI.selectNodeList(tn, key);
438                    if (nl.getLength() == 1)
439                    {
440                        node = nl.item(0);
441                        community.setMetadata((String) communityMap.get(key), getStringValue(node));
442                    }
443                }
444                
445                // FIXME: at the moment, if the community already exists by name
446                // then this will throw a PSQLException on a duplicate key
447                // violation
448                // Ideally we'd skip this row and continue to create sub
449                // communities
450                // and so forth where they don't exist, but it's proving
451                // difficult
452                // to isolate the community that already exists without hitting
453                // the database directly.
454                community.update();
455                
456                // build the element with the handle that identifies the new
457                // community
458                // along with all the information that we imported here
459                // This looks like a lot of repetition of getting information
460                // from above
461                // but it's here to keep it separate from the create process in
462                // case
463                // we want to move it or make it switchable later
464                element.setAttribute("identifier", community.getHandle());
465                
466                Element nameElement = new Element("name");
467                nameElement.setText(community.getMetadata("name"));
468                element.addContent(nameElement);
469                
470                if (community.getMetadata("short_description") != null)
471                {
472                    Element descriptionElement = new Element("description");
473                    descriptionElement.setText(community.getMetadata("short_description"));
474                    element.addContent(descriptionElement);
475                }
476                
477                if (community.getMetadata("introductory_text") != null)
478                {
479                    Element introElement = new Element("intro");
480                    introElement.setText(community.getMetadata("introductory_text"));
481                    element.addContent(introElement);
482                }
483                
484                if (community.getMetadata("copyright_text") != null)
485                {
486                    Element copyrightElement = new Element("copyright");
487                    copyrightElement.setText(community.getMetadata("copyright_text"));
488                    element.addContent(copyrightElement);
489                }
490                
491                if (community.getMetadata("side_bar_text") != null)
492                {
493                    Element sidebarElement = new Element("sidebar");
494                    sidebarElement.setText(community.getMetadata("side_bar_text"));
495                    element.addContent(sidebarElement);
496                }
497                
498                // handle sub communities
499                NodeList subCommunities = XPathAPI.selectNodeList(tn, "community");
500                Element[] subCommunityElements = handleCommunities(context, subCommunities, community);
501                
502                // handle collections
503                NodeList collections = XPathAPI.selectNodeList(tn, "collection");
504                Element[] collectionElements = handleCollections(context, collections, community);
505                
506                int j;
507                for (j = 0; j < subCommunityElements.length; j++)
508                {
509                    element.addContent(subCommunityElements[j]);
510                }
511                for (j = 0; j < collectionElements.length; j++)
512                {
513                    element.addContent(collectionElements[j]);
514                }
515                
516                elements[i] = element;
517            }
518            
519            return elements;
520        }
521        
522        /**
523         *  Take a node list of collections and create the structure from them
524         * 
525         * @param context the context of the request
526         * @param collections the node list of collections to be created
527         * @param parent the parent community to whom the collections belong
528         * 
529         * @return an Element array containing additional information about the
530         *                  created collections (e.g. the handle)
531         */
532        private static Element[] handleCollections(Context context, NodeList collections, Community parent)
533            throws TransformerException, SQLException, AuthorizeException, IOException, Exception
534        {
535            Element[] elements = new Element[collections.getLength()];
536            
537            for (int i = 0; i < collections.getLength(); i++)
538            {
539                Element element = new Element("collection");
540                Collection collection = parent.createCollection();
541                
542                // default the short description to the empty string
543                collection.setMetadata("short_description", " ");
544                
545                // import the rest of the metadata
546                Node tn = collections.item(i);
547                Enumeration keys = collectionMap.keys();
548                while (keys.hasMoreElements())
549                {
550                    Node node = null;
551                    String key = (String) keys.nextElement();
552                    NodeList nl = XPathAPI.selectNodeList(tn, key);
553                    if (nl.getLength() == 1)
554                    {
555                        node = nl.item(0);
556                        collection.setMetadata((String) collectionMap.get(key), getStringValue(node));
557                    }
558                }
559                
560                collection.update();
561                
562                element.setAttribute("identifier", collection.getHandle());
563                
564                Element nameElement = new Element("name");
565                nameElement.setText(collection.getMetadata("name"));
566                element.addContent(nameElement);
567                
568                if (collection.getMetadata("short_description") != null)
569                {
570                    Element descriptionElement = new Element("description");
571                    descriptionElement.setText(collection.getMetadata("short_description"));
572                    element.addContent(descriptionElement);
573                }
574                
575                if (collection.getMetadata("introductory_text") != null)
576                {
577                    Element introElement = new Element("intro");
578                    introElement.setText(collection.getMetadata("introductory_text"));
579                    element.addContent(introElement);
580                }
581                
582                if (collection.getMetadata("copyright_text") != null)
583                {
584                    Element copyrightElement = new Element("copyright");
585                    copyrightElement.setText(collection.getMetadata("copyright_text"));
586                    element.addContent(copyrightElement);
587                }
588                
589                if (collection.getMetadata("side_bar_text") != null)
590                {
591                    Element sidebarElement = new Element("sidebar");
592                    sidebarElement.setText(collection.getMetadata("side_bar_text"));
593                    element.addContent(sidebarElement);
594                }
595                
596                if (collection.getMetadata("license") != null)
597                {
598                    Element sidebarElement = new Element("license");
599                    sidebarElement.setText(collection.getMetadata("license"));
600                    element.addContent(sidebarElement);
601                }
602                
603                if (collection.getMetadata("provenance_description") != null)
604                {
605                    Element sidebarElement = new Element("provenance");
606                    sidebarElement.setText(collection.getMetadata("provenance_description"));
607                    element.addContent(sidebarElement);
608                }
609                
610                elements[i] = element;
611            }
612            
613            return elements;
614        }
615        
616    }