(* $Id: netmime.mli,v 1.6 2002/06/09 11:35:16 stolpmann Exp $
 * ----------------------------------------------------------------------
 *
 *)

(* Netmime contains high-level classes and functions to process
 * MIME messages.
 *)

(****************************** Types *********************************)

open Netchannels

type store =
  [ `Memory
  | `File of string
      (* The string is the filename of a file containing the (decoded) value
       * of the body
       *)
  ]

exception Immutable of string
  (* Raised if it is tried to modify a read-only value. The string denotes
   * the function or method where the incident happened.
   *)


(* MIME headers and bodies are defined in two steps. First the subtype
 * describing read access is defined (mime_header_ro, and mime_body_ro),
 * and after that the full class type including write access is defined
 * (mime_header, and mime_body).
 *
 * The idea is that you can write functions that take an ro value as
 * input to indicate that they do not modify the value. For example:
 * 
 * let number_of_fields (h:#mime_header_ro) =
 *   List.length (h#fields)
 *
 * This function accepts both mime_header, and mime_header_ro values as
 * input, but the typing ensures that the function does not mutate anything.
 *
 * There is another way to ensure that a header or body is not modified.
 * The read-only flag can be set when creating the object, and this flag
 * causes that all trials to modify the value will raise the exception
 * Immutable. Of course, such trials of mutation are only detected at
 * run-time.
 *
 * The advantage of the read-only flag is that it even works if 
 * mutation depends on a condition, but it can be ensured that this
 * condition is never true. Furthermore, typing is much simpler (getting
 * subtyping correct can be annoying).
 *)

class type mime_header_ro =             
object
  (* read-only view of a mime_header *)

  method fields : (string * string) list
  method field  : string -> string
  method multiple_field : string -> string list
    (* The current fields of the header. [fields] returns the complete
     * header. [field name] returns the value of the field, or raises
     * Not_found. [multiple_field name] returns all fields with the same
     * name.
     * Note that field names are case-insensitive; [field "content-length"],
     * and [field "CONTENT-LENGTH"] will return the same field. However,
     * the method [fields] returns the original field names, without
     * adjustment of the case.
     * The order of the fields is preserved.
     *)

  (* --------------------- Standard fields ----------------------- *)

  (* The following methods will raise Not_found if the fields are not
   * present.
   *)

  method content_length : unit -> int

  method content_type : 
           unit -> (string * (string * Mimestring.s_param)list)
    (* [content_type] parses this field, and returns 
     * (main_type, [ paramname1,paramvalue1; ...; paramnameN,paramvalueN ]).
     *)

  method content_disposition : 
           unit -> (string * (string * Mimestring.s_param)list)
    (* [content_disposition] parses this field, and returns 
     * (main_disp, [ paramname1,paramvalue1; ...; paramnameN,paramvalueN ]).
     *)

  method content_transfer_encoding : unit -> string
end


class type mime_header = 
object
  (* A mutable or immutable mime_header *)

  inherit mime_header_ro

  method ro : bool
    (* whether the header is read-only or not *)

  method set_fields : (string * string) list -> unit
  method update_field : string -> string -> unit
  method update_multiple_field : string -> string list -> unit
  method delete_field : string -> unit
    (* These methods modify the fields of the header. If the header is
     * read-only, the exception Immutable will be raised.
     * [set_fields] replaces the current fields with a new list of
     * (name,value) pairs. [update_field name value] replaces all fields
     * of the passed name with the single setting (name,value), or
     * adds this setting to the list. [update_multiple_field name values]
     * replaces all fields of the passed name with the list of values,
     * or adds this list. Finally, [delete_field name] deletes all
     * fields of the passed name. Nothing happens if there is no such
     * field.
     *)

end


class type mime_body_ro =
object
  (* a read-only view of a mime_body *)

  method value : string
    (* Also known as "body". The [value] method returns the _decoded_ body,
     * i.e. transfer encodings are removed.
     *)

  method store : store
    (* Where the body is actually stored. *)

  method open_value_rd : unit -> in_obj_channel
    (* Opens the value for reading. *)

  method finalize : unit -> unit
    (* After the body has been finalized, it cannot be accessed any longer.
     * External resources (files) are deallocated.
     *)
end


class type mime_body =
object
  (* A mutable or immutable mime_body *)

  inherit mime_body_ro

  method ro : bool
    (* whether this body is read-only or not *)

  method set_value : string -> unit
    (* Sets the value. If the value is immutable, the exception
     * Immutable will be raised
     *)

  method open_value_wr : unit -> out_obj_channel
    (* Opens the value for writing. The current value is overwritten. 
     * If the value is immutable, the exception Immutable will be raised.
     *)

end


(* One can consider the pair (mime_header, mime_body) as simple MIME
 * message. The type [complex_mime_message] describes a message that
 * can be either a simple message or a multipart message.
 *
 * Of course, there is also the read-only variant. 
 *)


type complex_mime_message = mime_header * complex_mime_body
and complex_mime_body =
  [ `Body of mime_body
  | `Parts of complex_mime_message list
  ]
  (* A complex_mime_message can have (nested) multipart structure. *)

type complex_mime_message_ro = mime_header_ro * complex_mime_body_ro
and complex_mime_body_ro =
  [ `Body of mime_body_ro
  | `Parts of complex_mime_message_ro list
  ]
  (* The read-only view of a complex_mime_message *)


(* Note: `Parts [], i.e. `Parts together with an empty list, is illegal *)


type mime_message = mime_header * [ `Body of mime_body ]

type mime_message_ro = mime_header_ro * [ `Body of mime_body_ro ]

(************************** Representations ***************************)

class basic_mime_header : ?ro:bool -> (string * string) list -> mime_header
  (* The argument is the list of (name,value) pairs of the header. 
   * ~ro: whether the header is read-only (default:false)
   *
   * This [mime_header] implementation bases on the [Map] module as data
   * structure. The efficiency of the operations:
   * - [new], [set_fields]: O(n log n), but the construction of the dictionary
   *   is deferred until the first real access
   * - [field], [multiple_field]: O(log n)
   * - [fields]: O(n log n)
   * - [update_field], [update_multiple_field]: O(log n)
   * - [delete_field]: O(n)
   *)

class memory_mime_body : ?ro:bool -> string -> mime_body
  (* The argument is the (decoded) body.
   * The method [store] returns [`Memory].
   *
   * ~ro: whether the body is read-only (default:false)
   *)


class file_mime_body : ?ro:bool -> ?fin:bool -> string -> mime_body
  (* The argument is the name of the file containing the (decoded) body. 
   * The method [store] returns [`File filename].
   * The method [value] loads the contents of the file and returns them
   * as string.
   *
   * ~ro: whether the body is read-only (default:false)
   * ~fin: whether to delete the file when the [finalize] method is called
   *   (default:false)
   *)

(**********************************************************************)

val read_mime_header :
      ?unfold:bool ->                        (* default: false *)
      ?strip:bool ->                         (* default: true *)
      ?ro:bool ->                            (* default: false *)
      Netstream.in_obj_stream -> 
	mime_header
  (* Decodes the MIME header that begins at the current position of the
   * netstream, and return the header as class [basic_mime_header].
   * The stream is positioned at the byte following the empty line 
   * terminating the header.
   *
   * ~unfold: whether linefeeds are replaced by spaces in the values of the
   *   header fields (Note: defaults to [false] here in contrast to
   *   Mimestring.scan_header!)
   * ~strip: whether whitespace at the beginning and at the end of the 
   *   header fields is stripped
   *)

type multipart_style = [ `None | `Flat | `Deep ]
  (* How to parse multipart messages:
   * `None: Do not handle multipart messages specially. The multipart body
   *    is returned as any other body.
   *    [read_mime_message] will return values of the form 
   *    (header, `Body body)
   * `Flat: If the top-level message is a multipart message, the parts
   *    are separated and returned as list. If the parts are again multipart
   *    messages, these inner multipart messages are not decoded but returned
   *    as single message.
   *    [read_mime_message] will return values of the form
   *    (header, `Body body), or
   *    (header, `Parts [ subhdr1, `Body subbdy1; ...; subhdrN, `Body subbdyN ])
   * `Deep: Multipart messages are recursively decoded and returned as
   *    tree structure.
   *    [read_mime_message] will return values of no special form (general
   *    case)
   *)

val decode_mime_body : #mime_header_ro -> out_obj_channel -> out_obj_channel
  (* let ch' = decode_mime_body hdr ch:
   * According to the value of the Content-transfer-encoding header field
   * in [hdr] the encoded MIME body written to ch' is decoded and transferred
   * to ch.
   * Handles 7bit,8bit,binary,quoted-printable,base64.
   *)


val encode_mime_body : #mime_header_ro -> out_obj_channel -> out_obj_channel
  (* let ch' = encode_mime_body hdr ch:
   * According to the value of the Content-transfer-encoding header field
   * in [hdr] the unencoded MIME body written to ch' is encoded and transferred
   * to ch.
   * Handles 7bit,8bit,binary,quoted-printable,base64.
   * KNOWN PROBLEM: The implementation does not limit the length of the lines 
   * produced for quoted-printable.
   *)


val storage : ?ro:bool -> ?fin:bool -> store -> (mime_body * out_obj_channel)
  (* Creates a new storage facility for a mime body according to [store].
   * See ~storage_style below.
   *
   * ~ro: whether the returned mime_bodies are read-only or not. Note that
   *   it is always possible to write into the body using the returned
   *   out_obj_channel regardless of the value of ~ro.
   *   Default: false
   * ~fin: whether to finalize bodies stored in files.
   *   Default: false
   *)

val read_mime_message : 
      ?unfold:bool ->                                     (* Default: false *)
      ?strip:bool ->                                      (* default: true *)
      ?ro:bool ->                                         (* Default: false *)
      ?multipart_style:multipart_style ->                 (* Default: `Deep *)
      ?storage_style:(mime_header -> (mime_body * out_obj_channel)) ->
      Netstream.in_obj_stream -> 
        complex_mime_message

  (* Decodes the MIME message that begins at the current position of the
   * passed netstream, and that continues until EOF.
   *
   * Multipart messages are decoded as specified by ~multipart_style (see
   * above).
   *
   * Message bodies with content-transfer-encodings of 7bit, 8bit, binary,
   * base64, and quoted-printable can be processed. The bodies are stored
   * without content-transfer-encoding (i.e. in decoded form), but the
   * content-transfer-encoding header field is not removed from the header.
   *
   * The ~storage_style function determines where every message body is
   * stored. The corresponding header of the body is passed to the function
   * as argument; the result of the function is a pair of a new mime_body
   * and an out_obj_channel writing into this body. You can create such a
   * pair by calling [storage]. 
   *
   * By default, the ~storage_style is [storage ?ro `Memory] for every header. 
   * Here, the designator [`Memory] means that the body will be stored in a
   * string. The designator [`File fn] means that the body will be stored in the
   * file [fn]. The file will be created if it does not yet exist, and
   * it will be overwritten if it does already exist.
   *
   * Note that the ~storage_style function is called for every non-multipart
   * body.
   *
   * Large message bodies (> maximum string length) are supported if the
   * bodies are stored in files. The memory consumption is optimized for
   * this case, and usually only a small constant amount of memory is needed.
   *
   * ~unfold: whether linefeeds are replaced by spaces in the values of the
   *   header fields (Note: defaults to [false] here in contrast to
   *   Mimestring.scan_header!)
   *
   * ~strip: whether whitespace at the beginning and at the end of the 
   *   header fields is stripped
   *
   * ~ro: Whether the created MIME headers are read-only or not. Furthermore,
   *   the default ~storage_style uses this parameter for the MIME bodies, too.
   *   However, the MIME bodies may have a different read-only flag in general.
   *
   * EXAMPLE:
   *
   * Parse the MIME message stored in the file f:
   *
   * let m = read_mime_message 
   *           (new input_stream (new input_channel (open_in f)))
   *
   *)

  (* TODO: what about messages with type "message/*"? It may be possible that
   * they can be recursively decoded, but it is also legal for some media
   * types that they are "partial".
   * Currently the type "message/*" is NOT decoded.
   *)

val write_mime_message :
      ?wr_header:bool ->                       (* default: true *)
      ?wr_body:bool ->                         (* default: true *)
      ?nr:int ->                               (* default: 0 *)
      ?ret_boundary:string ref ->              (* default: do not return it *)
      Netchannels.out_obj_channel ->
      complex_mime_message ->
        unit
  (* Writes the MIME message to the output channel. The content-transfer-
   * encoding of the leaves is respected, and their bodies are encoded
   * accordingly. The content-transfer-encoding of multipart messages is
   * always "fixed", i.e. set to "7bit", "8bit", or "binary" depending
   * on the contents.
   *
   * The function fails if multipart messages do not have a multipart
   * content type field (i.e. the content type does not begin with "multipart").
   * If only the boundary parameter is missing, a good boundary parameter is
   * added to the content type. "Good" means here that it is impossible
   * that the boundary string occurs in the message body if the
   * content-transfer-encoding is quoted-printable or base64, and that
   * such an occurrence is very unlikely if the body is not encoded.
   * If the whole content type field is missing, a "multipart/mixed" type
   * with a boundary parameter is added to the printed header.
   *
   * Note that already existing boundaries are used, no matter whether
   * they are of good quality or not.
   *
   * No other header fields are added, deleted or modified. These
   * modifications are _not_ written back to the passed MIME message.
   *
   * It is possible in some cases that the boundary does not work (both
   * the existing boundary, and the added boundary). This causes that a wrong
   * and unparseable MIME message is written.
   *
   * Note that if the passed message is a simple message like (_,`Body _),
   * and if no content-transfer-encoding is set, the written message might
   * not end with a linefeed character.
   *
   * ~wr_header: If true, the outermost header is written. Inner headers
   *   of the message parts are written unless ~wr_body=false.
   * ~wr_body: If true, the body of the whole message is written; if false,
   *   no body is written at all.
   * ~nr: This argument sets the counter that is included in generated
   *   boundaries to a certain minimum value.
   * ~ret_boundary: if passed, the boundary of the outermost multipart
   *   message is written to this reference. (Internally used.)
   *)


(* ======================================================================
 * History:
 * 
 * $Log: netmime.mli,v $
 * Revision 1.6  2002/06/09 11:35:16  stolpmann
 * 	Passing ~strip as suggested by Matt Armstrong.
 *
 * Revision 1.5  2002/01/23 22:17:06  stolpmann
 * 	new functions: [encode_mime_body], [write_mime_message]
 *
 * Revision 1.4  2002/01/21 00:47:00  stolpmann
 * 	Improved comments.
 *
 * Revision 1.3  2002/01/14 01:08:03  stolpmann
 * 	Added type mime_message.
 * 	Removed the complement_* functions from the interface. The problem
 * is that the resulting objects are not really read-only.
 * 	New: decode_mime_body (extracted from read_mime_message)
 *
 * Revision 1.2  2002/01/12 18:36:17  stolpmann
 * 	Revised the whole module.
 *
 * Revision 1.1  2002/01/07 02:17:22  stolpmann
 * 	Initial revision.
 *
 * 
 *)
