<?xml version="1.0"?>
<!DOCTYPE filter SYSTEM "filter.dtd">
<!-- 
 Document Type Definition (DTD) for WWW filters.
 See http://webcleaner.sourceforge.net/filter.html
 
 Changes:

 20071107 Bastian Kleineidam
 * introduce htmlrewrite and xmlrewrite, replacing the original rewrite rule
 * rename <enclosed> child of htmlrewrite with <matchcontent>
 * added match replacement part
 20041227 Bastian Kleineidam
 * added "antivirus" element
 20040308 Bastian Kleineidam
 * added the "filterstage" attribute to header rules
 20040225 Bastian Kleineidam
 * multi-language rule title and description
 20040219 Bastian Kleineidam
 * added matchurl and nomatchurl as elements, and restructured the other
   elements according to this
 20040106 Bastian Kleineidam
 * removed pics, added rating stuff
 20031031 Bastian Kleineidam
 * remove oid attr from all rules except folder
 20031029 Bastian Kleineidam
 * replace attributes [scheme, host, port, path, query, parameters, fragment]
   with url
 20030806 Bastian Kleineidam
 * removed lang attribute, it was not used
 20030730 Bastian Kleineidam
 * renamed file attribute to filename
 * changed oid attribute from #IMPLIED to #REQUIRED
 * added sid attribute
 20030705 Bastian Kleineidam
 * renamed replacer to replace
 * added pics element
 20030307 Bastian Kleineidam
 * added javascript filter rule
 23.7.2002 Bastian Kleineidam
 * added allowurls and allowdomains filters
 29.6.2002 Bastian Kleineidam
 * added blockurls and blockdomains filters
 7.6.2002 Bastian Kleineidam
 * added oid
 23.2.2002 Bastian Kleineidam
 * added matchurl and dontmatchurl attributes
 10.8.2001 Bastian Kleineidam
 * added Replacer
 
 WWW filters are configuration files for eg. blocking URLs or rewriting
 HTML, image data or HTTP headers.
 Each file must have exactly one folder element.
 Each folder can have any number of rules (including zero).
 Filter files should have a .zap extension.
 In DATA sections, the following characters must be XML-encoded:
 Character	Code
   <             &lt;
   >             &gt;
   &             &amp;
   "             &quot;
   '             &apos;

-->


<!--                      Folder

   A folder holds a list of filter rules.
   The folder and each rule has a "disable" attribute to disable it.
   By default the folder or rule is enabled.
   This way configuration tools can display the description of all
   filters and rules and can enable/disable each of them separately.
-->
<!ELEMENT folder (title+,description*,(htmlrewrite|xmlrewrite|replace|javascript|block|blockurls|blockdomains|allow|header|image|nocomments|rating|antivirus)*)>
<!ATTLIST folder
 sid      CDATA #REQUIRED
 oid      CDATA #REQUIRED
 disable  CDATA "0"
>

<!--                      Title and description

    Rules have a title, and possibly a description. Title and
    description can be specified in multiple languages with the "lang"
    attribute.
-->
<!ELEMENT title (#PCDATA)>
<!ATTLIST title
 lang     CDATA #REQUIRED
>
<!ELEMENT description (#PCDATA)>
<!ATTLIST description
 lang     CDATA #REQUIRED
>

<!--                      URL matching

    Rules can apply only to a selected list of urls. Matchurl and nomatchurl
    specify regular expressions that have to match resp. not match an url
    for rule application.
-->
<!ELEMENT matchurl (#PCDATA)>
<!ELEMENT nomatchurl (#PCDATA)>

<!--                        HTMLRewriter

   The rewrite tag applies to HTML tags and can replace (or delete if
   the replacement data is empty) arbitrary HTML tag blocks.
   The tag name is a case insensitive string.

   If there is no <replace> tag the complete tag will be removed.
-->
<!ELEMENT htmlrewrite (title+,description*,matchurl*,nomatchurl*,attr*,matchcontent?,replacement?)>
<!ATTLIST htmlrewrite
 sid      CDATA #REQUIRED
 disable  CDATA "0"
 tag      CDATA "a"
>

<!--                      Attribute

   We can specify attributes the tag must have.
   The attribute name is a case insensitive string.
   The attribute value is a regular expression.
-->
<!ELEMENT attr (#PCDATA)>
<!ATTLIST attr
 name CDATA "href"
>

<!--                       matchcontent

   Specify a regular expression which matches the inner content block of a
   tag.
-->
<!ELEMENT matchcontent (#PCDATA)>

<!--                    Replacement

   replace part   Result with replacement='foo', contentmatch='ext'
   ==========================================================
   tag            <blink>text</blink>  => footextfoo
   tagname        <blink>text</blink>  => <foo>text</foo>
   enclosed       <blink>text</blink>  => <blink>foo</blink>
   matched        <blink>text</blink>  => <blink>tfoo</blink>
   attr           <a href="bla">..</a> => <a foo>..</a>
   attrval        <a href="bla">..</a> => <a href="foo">..</a>
   complete       <a href="bla">..</a> => foo
   
   If you specified zero or more than one attributes to match,
   'attr' and 'attrvalue' replace the first occuring or matching
   attribute or nothing.
-->
<!ELEMENT replacement (#PCDATA)>
<!ATTLIST replacement
 part     (tag|tagname|attr|attrval|enclosed|matched|complete) "complete"
>

<!--                        XMLRewriter

   The rewrite tag applies to XML tags and can replace or delete
   arbitrary XML elements matched by an XPath expression.
-->
<!ELEMENT xmlrewrite (title+,description*,matchurl*,nomatchurl*)>
<!ATTLIST xmlrewrite
 sid      CDATA #REQUIRED
 disable  CDATA "0"
 selector    CDATA #REQUIRED
 replacetype (rsshtml|remove) "rsshtml"
 value       CDATA #IMPLIED
>

<!--                       Blocker

    Here we can specify regular expressions for urls to block.

    The element data specifies the URL to show when the block matches.
    Applications should have a default URL.
-->
<!ELEMENT block (title+,description*,replacement?)>
<!ATTLIST block
 sid      CDATA #REQUIRED
 url        CDATA #IMPLIED
 disable    CDATA "0"
>

<!--                       Block Domains

    The traditional way of a lot of filter proxies is to block a list
    of domains. This filter specifies a filename with one domain per
    line.
    There is a bl2wc.py script to convert SquidGuard blacklists to
    WebCleaner .zap rules.
    The file can be gzip-compressed.
-->
<!ELEMENT blockdomains (title+,description*,replacement?)>
<!ATTLIST blockdomains
 sid      CDATA #REQUIRED
 filename CDATA #IMPLIED
>

<!--                       Allow Domains

    Same as block for allowing entire domains.
-->
<!ELEMENT allowdomains (title+,description*,replacement?)>
<!ATTLIST allowdomains
 sid      CDATA #REQUIRED
 filename CDATA #IMPLIED
>

<!--                       Block Urls

    SquidGuard as 'urls' files as blacklists specifying both host and
    path information.
    There is a bl2wc.py script to convert SquidGuard blacklists to
    WebCleaner .zap rules.
    The file can be gzip-compressed.
-->
<!ELEMENT blockurls (title+,description*,replacement?)>
<!ATTLIST blockurls
 sid      CDATA #REQUIRED
 filename CDATA #IMPLIED
>

<!--                       Allow Urls

    Same as block urls for allowing.
-->
<!ELEMENT allowurls (title+,description*,replacement?)>
<!ATTLIST allowurls
 sid      CDATA #REQUIRED
 filename CDATA #IMPLIED
>


<!--                       Allow

    Here we can specify urls to allow if they would be blocked
    otherwise.
-->
<!ELEMENT allow (title+,description*)>
<!ATTLIST allow
 sid        CDATA #REQUIRED
 url        CDATA #IMPLIED
 disable    CDATA "0"
>

<!--                       Header
     Modify HTTP headers. If the replacement value is missing, the header
     gets deleted. Otherwise the specified header is added or replaced.
-->
<!ELEMENT header (title+,description*,matchurl*,nomatchurl*,replacement?)>
<!ATTLIST header
 sid      CDATA #REQUIRED
 name     CDATA #REQUIRED
 disable  CDATA "0"
 filterstage (request|response|both) "request"
>

<!--                       Image

    Block images with a certain size. A replacement url can be specified.
-->
<!ELEMENT image (title+,description*,matchurl*,nomatchurl*,replacement?)>
<!ATTLIST image
 sid      CDATA #REQUIRED
 width    CDATA #IMPLIED
 height   CDATA #IMPLIED
 type     (gif|jpg|png) "gif"
 disable  CDATA "0"
>

<!--                      No comments

    Remove comments from HTML source
-->
<!ELEMENT nocomments (title+,description*,matchurl*,nomatchurl*)>
<!ATTLIST nocomments
 sid      CDATA #REQUIRED
 disable  CDATA "0"
>

<!--                      JavaScript

    Execute and filter JavaScript in HTML source
-->
<!ELEMENT javascript (title+,description*,matchurl*,nomatchurl*)>
<!ATTLIST javascript
 sid      CDATA #REQUIRED
 disable  CDATA "0"
>

<!--                      Replace

   Replace regular expressions in a data stream
-->
<!ELEMENT replace (title+,description*,matchurl*,nomatchurl*,replacement?)>
<!ATTLIST replace
 sid      CDATA #REQUIRED
 disable  CDATA "0"
 search   CDATA #REQUIRED
>

<!--                        Rating

   A rule enabling the rating system in WebCleaner.
-->
<!ELEMENT rating (title+,description*,matchurl*,nomatchurl*,url?,category*)>
<!ATTLIST rating
 sid      CDATA #REQUIRED
 disable  CDATA "0"
>

<!--                      rating category

   a category has an identification label and a rating value as content
-->
<!ELEMENT category (#PCDATA)>
<!ATTLIST category
 name     CDATA #REQUIRED
>

<!--                        Antivirus

   A rule enabling virus scanning in WebCleaner.
-->
<!ELEMENT antivirus (title+,description*,matchurl*,nomatchurl*)>
<!ATTLIST antivirus
 sid      CDATA #REQUIRED
 disable  CDATA "0"
>

