001    /*
002     * DCInputsReader.java
003     *
004     * Version: $Revision: 4365 $
005     *
006     * Date: $Date: 2009-10-05 23:52:42 +0000 (Mon, 05 Oct 2009) $
007     *
008     * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
009     * Institute of Technology.  All rights reserved.
010     *
011     * Redistribution and use in source and binary forms, with or without
012     * modification, are permitted provided that the following conditions are
013     * met:
014     *
015     * - Redistributions of source code must retain the above copyright
016     * notice, this list of conditions and the following disclaimer.
017     *
018     * - Redistributions in binary form must reproduce the above copyright
019     * notice, this list of conditions and the following disclaimer in the
020     * documentation and/or other materials provided with the distribution.
021     *
022     * - Neither the name of the Hewlett-Packard Company nor the name of the
023     * Massachusetts Institute of Technology nor the names of their
024     * contributors may be used to endorse or promote products derived from
025     * this software without specific prior written permission.
026     *
027     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028     * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029     * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
030     * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031     * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
032     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
033     * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
034     * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035     * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
036     * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
037     * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
038     * DAMAGE.
039     */
040    
041    package org.dspace.app.util;
042    
043    import java.io.File;
044    import java.util.List;
045    import java.util.Vector;
046    import java.util.HashMap;
047    import java.util.Iterator;
048    import org.xml.sax.SAXException;
049    import org.w3c.dom.*;
050    import javax.xml.parsers.*;
051    
052    import org.apache.log4j.Logger;
053    
054    import org.dspace.content.MetadataSchema;
055    import org.dspace.core.ConfigurationManager;
056    
057    /**
058     * Submission form generator for DSpace. Reads and parses the installation
059     * form definitions file, input-forms.xml, from the configuration directory.
060     * A forms definition details the page and field layout of the metadata
061     * collection pages used by the submission process. Each forms definition
062     * starts with a unique name that gets associated with that form set.
063     *
064     * The file also specifies which collections use which form sets. At a
065     * minimum, the definitions file must define a default mapping from the
066     * placeholder collection #0 to the distinguished form 'default'. Any
067     * collections that use a custom form set are listed paired with the name
068     * of the form set they use.
069     *
070     * The definitions file also may contain sets of value pairs. Each value pair
071     * will contain one string that the user reads, and a paired string that will
072     * supply the value stored in the database if its sibling display value gets
073     * selected from a choice list.
074     *
075     * @author  Brian S. Hughes
076     * @version $Revision: 4365 $
077     */
078    
079    public class DCInputsReader
080    {
081        /**
082         * The ID of the default collection. Will never be the ID of a named
083         * collection
084         */
085        public static final String DEFAULT_COLLECTION = "default";
086    
087        /** Name of the form definition XML file  */
088        static final String FORM_DEF_FILE = "input-forms.xml";
089    
090        /** Keyname for storing dropdown value-pair set name */
091        static final String PAIR_TYPE_NAME = "value-pairs-name";
092    
093        /** log4j logger */
094        private static Logger log = Logger.getLogger(DCInputsReader.class);
095    
096        /** The fully qualified pathname of the form definition XML file */
097        private String defsFile = ConfigurationManager.getProperty("dspace.dir")
098                + File.separator + "config" + File.separator + FORM_DEF_FILE;
099    
100        /**
101         * Reference to the collections to forms map, computed from the forms
102         * definition file
103         */
104        private HashMap whichForms = null;
105    
106        /**
107         * Reference to the forms definitions map, computed from the forms
108         * definition file
109         */
110        private HashMap formDefns  = null;
111    
112        /**
113         * Reference to the forms which allow, disallow or mandate files to be
114         * uploaded.
115         */
116        private HashMap formFileUploadDefns = null;
117    
118        /**
119         * Reference to the value-pairs map, computed from the forms defition file
120         */
121        private HashMap valuePairs = null;    // Holds display/storage pairs
122        
123        /**
124         * Mini-cache of last DCInputSet requested. If submissions are not typically
125         * form-interleaved, there will be a modest win.
126         */
127        private DCInputSet lastInputSet = null;
128    
129        /**
130         * Parse an XML encoded submission forms template file, and create a hashmap
131         * containing all the form information. This hashmap will contain three top
132         * level structures: a map between collections and forms, the definition for
133         * each page of each form, and lists of pairs of values that populate
134         * selection boxes.
135         */
136    
137        public DCInputsReader()
138             throws DCInputsReaderException
139        {
140            buildInputs(defsFile);
141        }
142    
143    
144        public DCInputsReader(String fileName)
145             throws DCInputsReaderException
146        {
147            buildInputs(fileName);
148        }
149    
150    
151        private void buildInputs(String fileName)
152             throws DCInputsReaderException
153        {
154            whichForms = new HashMap();
155            formDefns  = new HashMap();
156            valuePairs = new HashMap();
157    
158            String uri = "file:" + new File(fileName).getAbsolutePath();
159    
160            try
161            {
162                    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
163                    factory.setValidating(false);
164                    factory.setIgnoringComments(true);
165                    factory.setIgnoringElementContentWhitespace(true);
166                    
167                    DocumentBuilder db = factory.newDocumentBuilder();
168                    Document doc = db.parse(uri);
169                    doNodes(doc);
170                    checkValues();
171            }
172            catch (FactoryConfigurationError fe)
173            {
174                    throw new DCInputsReaderException("Cannot create Submission form parser",fe);
175            }
176            catch (Exception e)
177            {
178                    throw new DCInputsReaderException("Error creating submission forms: "+e);
179            }
180        }
181       
182        public Iterator getPairsNameIterator()
183        {
184            return valuePairs.keySet().iterator();
185        }
186    
187        public List getPairs(String name)
188        {
189            return (Vector)valuePairs.get(name);
190        }
191    
192        /**
193         * Returns the set of DC inputs used for a particular collection, or the
194         * default set if no inputs defined for the collection
195         *
196         * @param collectionHandle
197         *            collection's unique Handle
198         * @return DC input set
199         * @throws DCInputsReaderException
200         *             if no default set defined
201         */
202        public DCInputSet getInputs(String collectionHandle)
203                    throws DCInputsReaderException
204        {
205            String formName = (String)whichForms.get(collectionHandle);
206            if (formName == null)
207            {
208                    formName = (String)whichForms.get(DEFAULT_COLLECTION);
209            }
210            if (formName == null)
211            {
212                    throw new DCInputsReaderException("No form designated as default");
213            }
214            // check mini-cache, and return if match
215            if ( lastInputSet != null && lastInputSet.getFormName().equals( formName ) )
216            {
217                    return lastInputSet;
218            }
219            // cache miss - construct new DCInputSet
220            Vector pages = (Vector)formDefns.get(formName);
221            if ( pages == null )
222            {
223                    throw new DCInputsReaderException("Missing the " + formName  + " form");
224            }
225            lastInputSet = new DCInputSet(formName, pages, valuePairs);
226            return lastInputSet;
227        }
228        
229        /**
230         * Return the number of pages the inputs span for a desginated collection
231         * @param  collectionHandle   collection's unique Handle
232         * @return number of pages of input
233         * @throws DCInputsReaderException if no default set defined
234         */
235        public int getNumberInputPages(String collectionHandle)
236            throws DCInputsReaderException
237        {
238            return getInputs(collectionHandle).getNumberPages();
239        }
240        
241        /**
242         * Process the top level child nodes in the passed top-level node. These
243         * should correspond to the collection-form maps, the form definitions, and
244         * the display/storage word pairs.
245         */
246        private void doNodes(Node n)
247                    throws SAXException, DCInputsReaderException
248        {
249            if (n == null)
250            {
251                    return;
252            }
253            Node e = getElement(n);
254            NodeList nl = e.getChildNodes();
255            int len = nl.getLength();
256            boolean foundMap  = false;
257            boolean foundDefs = false;
258            for (int i = 0; i < len; i++)
259            {
260                    Node nd = nl.item(i);
261                    if ((nd == null) || isEmptyTextNode(nd))
262                    {
263                            continue;
264                    }
265                    String tagName = nd.getNodeName();
266                    if (tagName.equals("form-map"))
267                    {
268                            processMap(nd);
269                            foundMap = true;
270                    }
271                    else if (tagName.equals("form-definitions"))
272                    {
273                            processDefinition(nd);
274                            foundDefs = true;
275                    }
276                    else if (tagName.equals("form-value-pairs"))
277                    {
278                            processValuePairs(nd);
279                    }
280                    // Ignore unknown nodes
281            }
282            if (!foundMap)
283            {
284                    throw new DCInputsReaderException("No collection to form map found");
285            }
286            if (!foundDefs)
287            {
288                    throw new DCInputsReaderException("No form definition found");
289            }
290        }
291    
292        /**
293         * Process the form-map section of the XML file.
294         * Each element looks like:
295         *   <name-map collection-handle="hdl" form-name="name" />
296         * Extract the collection handle and form name, put name in hashmap keyed
297         * by the collection handle.
298         */
299        private void processMap(Node e)
300            throws SAXException
301        {
302            NodeList nl = e.getChildNodes();
303            int len = nl.getLength();
304            for (int i = 0; i < len; i++)
305            {
306                    Node nd = nl.item(i);
307                    if (nd.getNodeName().equals("name-map"))
308                    {
309                            String id = getAttribute(nd, "collection-handle");
310                            String value = getAttribute(nd, "form-name");
311                            String content = getValue(nd);
312                            if (id == null)
313                            {
314                                    throw new SAXException("name-map element is missing collection-handle attribute");
315                            }
316                            if (value == null)
317                            {
318                                    throw new SAXException("name-map element is missing form-name attribute");
319                            }
320                            if (content != null && content.length() > 0)
321                            {
322                                    throw new SAXException("name-map element has content, it should be empty.");
323                            }
324                            whichForms.put(id, value);
325                    }  // ignore any child node that isn't a "name-map"
326            }
327        }
328    
329        /**
330         * Process the form-definitions section of the XML file. Each element is
331         * formed thusly: <form name="formname">...pages...</form> Each pages
332         * subsection is formed: <page number="#"> ...fields... </page> Each field
333         * is formed from: dc-element, dc-qualifier, label, hint, input-type name,
334         * required text, and repeatable flag.
335         */
336        private void processDefinition(Node e)
337            throws SAXException, DCInputsReaderException
338        {
339            int numForms = 0;
340            NodeList nl = e.getChildNodes();
341            int len = nl.getLength();
342            for (int i = 0; i < len; i++)
343            {
344                    Node nd = nl.item(i);
345                    // process each form definition
346                    if (nd.getNodeName().equals("form"))
347                    {
348                            numForms++;
349                            String formName = getAttribute(nd, "name");
350                            if (formName == null)
351                            {
352                                    throw new SAXException("form element has no name attribute");
353                            }
354                            Vector pages = new Vector(); // the form contains pages
355                            formDefns.put(formName, pages);
356                            NodeList pl = nd.getChildNodes();
357                            int lenpg = pl.getLength();
358                            for (int j = 0; j < lenpg; j++)
359                            {
360                                    Node npg = pl.item(j);
361                                    // process each page definition
362                                    if (npg.getNodeName().equals("page"))
363                                    {
364                                            String pgNum = getAttribute(npg, "number");
365                                            if (pgNum == null)
366                                            {
367                                                    throw new SAXException("Form " + formName + " has no identified pages");
368                                            }
369                                            Vector page = new Vector();
370                                            pages.add(page);
371                                            NodeList flds = npg.getChildNodes();
372                                            int lenflds = flds.getLength();
373                                            for (int k = 0; k < lenflds; k++)
374                                            {
375                                                    Node nfld = flds.item(k);
376                                                    if ( nfld.getNodeName().equals("field") )
377                                                    {
378                                                            // process each field definition
379                                                            HashMap field = new HashMap();
380                                                            page.add(field);
381                                                            processPageParts(formName, pgNum, nfld, field);
382                                                            String error = checkForDups(formName, field, pages);
383                                                            if (error != null)
384                                                            {
385                                                                    throw new SAXException(error);
386                                                            }
387                                                    }
388                                            }
389                                    } // ignore any child that is not a 'page'
390                            }
391                            // sanity check number of pages
392                            if (pages.size() < 1)
393                            {
394                                    throw new DCInputsReaderException("Form " + formName + " has no pages");
395                            }
396                    }
397            }
398            if (numForms == 0)
399            {
400                    throw new DCInputsReaderException("No form definition found");
401            }
402        }
403    
404        /**
405         * Process parts of a field
406         * At the end, make sure that input-types 'qualdrop_value' and
407         * 'twobox' are marked repeatable. Complain if dc-element, label,
408         * or input-type are missing.
409         */
410        private void processPageParts(String formName, String page, Node n, HashMap field)
411            throws SAXException
412        {
413            NodeList nl = n.getChildNodes();
414            int len = nl.getLength();
415            for (int i = 0; i < len; i++)
416            {
417                    Node nd = nl.item(i);
418                    if ( ! isEmptyTextNode(nd) )
419                    {
420                            String tagName = nd.getNodeName();
421                            String value   = getValue(nd);
422                            field.put(tagName, value);
423                            if (tagName.equals("input-type"))
424                            {
425                        if (value.equals("dropdown")
426                                || value.equals("qualdrop_value")
427                                || value.equals("list"))
428                                    {
429                                            String pairTypeName = getAttribute(nd, PAIR_TYPE_NAME);
430                                            if (pairTypeName == null)
431                                            {
432                                                    throw new SAXException("Form " + formName + ", field " +
433                                                                                                    field.get("dc-element") +
434                                                                                                            "." + field.get("dc-qualifier") +
435                                                                                                    " has no name attribute");
436                                            }
437                                            else
438                                            {
439                                                    field.put(PAIR_TYPE_NAME, pairTypeName);
440                                            }
441                                    }
442                            }
443                            else if (tagName.equals("vocabulary"))
444                            {
445                                    String closedVocabularyString = getAttribute(nd, "closed");
446                                field.put("closedVocabulary", closedVocabularyString);
447                            }
448                    }
449            }
450            String missing = null;
451            if (field.get("dc-element") == null)
452            {
453                    missing = "dc-element";
454            }
455            if (field.get("label") == null)
456            {
457                    missing = "label";
458            }
459            if (field.get("input-type") == null)
460            {
461                    missing = "input-type";
462            }
463            if ( missing != null )
464            {
465                    String msg = "Required field " + missing + " missing on page " + page + " of form " + formName;
466                    throw new SAXException(msg);
467            }
468            String type = (String)field.get("input-type");
469            if (type.equals("twobox") || type.equals("qualdrop_value"))
470            {
471                    String rpt = (String)field.get("repeatable");
472                    if ((rpt == null) ||
473                                    ((!rpt.equalsIgnoreCase("yes")) &&
474                                                    (!rpt.equalsIgnoreCase("true"))))
475                    {
476                            String msg = "The field \'"+field.get("label")+"\' must be repeatable";
477                            throw new SAXException(msg);
478                    }
479            }
480        }
481    
482        /**
483         * Check that this is the only field with the name dc-element.dc-qualifier
484         * If there is a duplicate, return an error message, else return null;
485         */
486        private String checkForDups(String formName, HashMap field, Vector pages)
487        {
488            int matches = 0;
489            String err = null;
490            String schema = (String)field.get("dc-schema");
491            String elem = (String)field.get("dc-element");
492            String qual = (String)field.get("dc-qualifier");
493            if ((schema == null) || (schema.equals("")))
494            {
495                schema = MetadataSchema.DC_SCHEMA;
496            }
497            String schemaTest;
498            
499            for (int i = 0; i < pages.size(); i++)
500            {
501                Vector pg = (Vector)pages.get(i);
502                for (int j = 0; j < pg.size(); j++)
503                {
504                    HashMap fld = (HashMap)pg.get(j);
505                    if ((fld.get("dc-schema") == null) ||
506                        (((String)fld.get("dc-schema")).equals("")))
507                    {
508                        schemaTest = MetadataSchema.DC_SCHEMA;
509                    }
510                    else
511                    {
512                        schemaTest = (String)fld.get("dc-schema");
513                    }
514                    
515                    // Are the schema and element the same? If so, check the qualifier
516                    if ((((String)fld.get("dc-element")).equals(elem)) &&
517                        (schemaTest.equals(schema)))
518                    {
519                        String ql = (String)fld.get("dc-qualifier");
520                        if (qual != null)
521                        {
522                            if ((ql != null) && ql.equals(qual))
523                            {
524                                matches++;
525                            }
526                        }
527                        else if (ql == null)
528                        {
529                            matches++;
530                        }
531                    }
532                }
533            }
534            if (matches > 1)
535            {
536                err = "Duplicate field " + schema + "." + elem + "." + qual + " detected in form " + formName;
537            }
538            
539            return err;
540        }
541    
542    
543        /**
544         * Process the form-value-pairs section of the XML file.
545         *  Each element is formed thusly:
546         *      <value-pairs name="..." dc-term="...">
547         *          <pair>
548         *            <display>displayed name-</display>
549         *            <storage>stored name</storage>
550         *          </pair>
551         * For each value-pairs element, create a new vector, and extract all
552         * the pairs contained within it. Put the display and storage values,
553         * respectively, in the next slots in the vector. Store the vector
554         * in the passed in hashmap.
555         */
556        private void processValuePairs(Node e)
557                    throws SAXException
558        {
559            NodeList nl = e.getChildNodes();
560            int len = nl.getLength();
561            for (int i = 0; i < len; i++)
562            {
563                    Node nd = nl.item(i);
564                        String tagName = nd.getNodeName();
565    
566                        // process each value-pairs set
567                        if (tagName.equals("value-pairs"))
568                        {
569                            String pairsName = getAttribute(nd, PAIR_TYPE_NAME);
570                            String dcTerm = getAttribute(nd, "dc-term");
571                            if (pairsName == null)
572                            {
573                                    String errString =
574                                            "Missing name attribute for value-pairs for DC term " + dcTerm;
575                                    throw new SAXException(errString);
576    
577                            }
578                            Vector pairs = new Vector();
579                            valuePairs.put(pairsName, pairs);
580                            NodeList cl = nd.getChildNodes();
581                            int lench = cl.getLength();
582                            for (int j = 0; j < lench; j++)
583                            {
584                                    Node nch = cl.item(j);
585                                    String display = null;
586                                    String storage = null;
587    
588                                    if (nch.getNodeName().equals("pair"))
589                                    {
590                                            NodeList pl = nch.getChildNodes();
591                                            int plen = pl.getLength();
592                                            for (int k = 0; k < plen; k++)
593                                            {
594                                                    Node vn= pl.item(k);
595                                                    String vName = vn.getNodeName();
596                                                    if (vName.equals("displayed-value"))
597                                                    {
598                                                            display = getValue(vn);
599                                                    }
600                                                    else if (vName.equals("stored-value"))
601                                                    {
602                                                            storage = getValue(vn);
603                                                            if (storage == null)
604                                                            {
605                                                                    storage = "";
606                                                            }
607                                                    } // ignore any children that aren't 'display' or 'storage'
608                                            }
609                                            pairs.add(display);
610                                            pairs.add(storage);
611                                    } // ignore any children that aren't a 'pair'
612                            }
613                        } // ignore any children that aren't a 'value-pair'
614            }
615        }
616    
617    
618        /**
619         * Check that all referenced value-pairs are present
620         * and field is consistent
621         *
622         * Throws DCInputsReaderException if detects a missing value-pair.
623         */
624    
625        private void checkValues()
626                    throws DCInputsReaderException
627        {
628            // Step through every field of every page of every form
629            Iterator ki = formDefns.keySet().iterator();
630            while (ki.hasNext())
631            {
632                    String idName = (String)ki.next();
633                    Vector pages = (Vector)formDefns.get(idName);
634                    for (int i = 0; i < pages.size(); i++)
635                    {
636                            Vector page = (Vector)pages.get(i);
637                            for (int j = 0; j < page.size(); j++)
638                            {
639                                    HashMap fld = (HashMap)page.get(j);
640                                    // verify reference in certain input types
641                                    String type = (String)fld.get("input-type");
642                        if (type.equals("dropdown")
643                                || type.equals("qualdrop_value")
644                                || type.equals("list"))
645                                    {
646                                            String pairsName = (String)fld.get(PAIR_TYPE_NAME);
647                                            Vector v = (Vector)valuePairs.get(pairsName);
648                                            if (v == null)
649                                            {
650                                                    String errString = "Cannot find value pairs for " + pairsName;
651                                                    throw new DCInputsReaderException(errString);
652                                            }
653                                    }
654                                    // if visibility restricted, make sure field is not required
655                                    String visibility = (String)fld.get("visibility");
656                                    if (visibility != null && visibility.length() > 0 )
657                                    {
658                                            String required = (String)fld.get("required");
659                                            if (required != null && required.length() > 0)
660                                            {
661                                                    String errString = "Field '" + (String)fld.get("label") +
662                                                                            "' is required but invisible";
663                                                    throw new DCInputsReaderException(errString);
664                                            }
665                                    }
666                            }
667                    }
668            }
669        }
670        
671        private Node getElement(Node nd)
672        {
673            NodeList nl = nd.getChildNodes();
674            int len = nl.getLength();
675            for (int i = 0; i < len; i++)
676            {
677                Node n = nl.item(i);
678                if (n.getNodeType() == Node.ELEMENT_NODE)
679                {
680                    return n;
681                }
682            }
683            return null;
684         }
685    
686        private boolean isEmptyTextNode(Node nd)
687        {
688            boolean isEmpty = false;
689            if (nd.getNodeType() == Node.TEXT_NODE)
690            {
691                    String text = nd.getNodeValue().trim();
692                    if (text.length() == 0)
693                    {
694                            isEmpty = true;
695                    }
696            }
697            return isEmpty;
698        }
699    
700        /**
701         * Returns the value of the node's attribute named <name>
702         */
703        private String getAttribute(Node e, String name)
704        {
705            NamedNodeMap attrs = e.getAttributes();
706            int len = attrs.getLength();
707            if (len > 0)
708            {
709                    int i;
710                    for (i = 0; i < len; i++)
711                    {
712                            Node attr = attrs.item(i);
713                            if (name.equals(attr.getNodeName()))
714                            {
715                                    return attr.getNodeValue().trim();
716                            }
717                    }
718            }
719            //no such attribute
720            return null;
721        }
722    
723        /**
724         * Returns the value found in the Text node (if any) in the
725         * node list that's passed in.
726         */
727        private String getValue(Node nd)
728        {
729            NodeList nl = nd.getChildNodes();
730            int len = nl.getLength();
731            for (int i = 0; i < len; i++)
732            {
733                    Node n = nl.item(i);
734                    short type = n.getNodeType();
735                    if (type == Node.TEXT_NODE)
736                    {
737                            return n.getNodeValue().trim();
738                    }
739            }
740            // Didn't find a text node
741            return null;
742        }
743    }