exported from svn
This commit is contained in:
commit
03995d3bc6
85 changed files with 14765 additions and 0 deletions
248
scripts/url_syntax.php
Normal file
248
scripts/url_syntax.php
Normal file
|
@ -0,0 +1,248 @@
|
|||
<?php
|
||||
|
||||
function check_url ($url2check, $types) {
|
||||
|
||||
# Be paranoid about using grouping!
|
||||
|
||||
$nz_digit = '[1-9]';
|
||||
$nz_digits = "(?:$nz_digit\\d*)";
|
||||
$digits = '(?:\d+)';
|
||||
$space = '(?:%20)';
|
||||
$nl = '(?:%0[Aa])';
|
||||
$dot = '\.';
|
||||
$plus = '\+';
|
||||
$qm = '\?';
|
||||
$ast = '\*';
|
||||
$hex = '[a-fA-F\d]';
|
||||
$alpha = '[a-zA-Z]'; # No, no locale.
|
||||
$alphas = "(?:${alpha}+)";
|
||||
$alphanum = '[a-zA-Z\d]'; # Letter or digit.
|
||||
$xalphanum = "(?:${alphanum}|%(?:3\\d|[46]$hex|[57][Aa\\d]))";
|
||||
# Letter or digit, or hex escaped letter/digit.
|
||||
$alphanums = "(?:${alphanum}+)";
|
||||
$escape = "(?:%$hex\{2})";
|
||||
$safe = '[$\-_.+]';
|
||||
$extra = "[\!*'(),]";
|
||||
$national = '[{}|\\^~[\]`]';
|
||||
$punctuation = '[<>#%"]';
|
||||
$reserved = '[;/?:@&=]';
|
||||
$uchar = "(?:${alphanum}|${safe}|${extra}|${escape})";
|
||||
$xchar = "(?:${alphanum}|${safe}|${extra}|${reserved}|${escape})";
|
||||
$uchar = str_replace (']|[', '', $uchar); // Make string smaller, and speed up regex.
|
||||
$uchar = str_replace (']|[', '', $xchar); // Make string smaller, and speed up regex.
|
||||
|
||||
# URL schemeparts for ip based protocols:
|
||||
$user = "(?:(?:${uchar}|[;?&=])*)";
|
||||
$password = "(?:(?:${uchar}|[;?&=])*)";
|
||||
$hostnumber = "(?:${digits}(?:${dot}${digits}){3})";
|
||||
$toplabel = "(?:${alpha}(?:(?:${alphanum}|-)*${alphanum})?)";
|
||||
$domainlabel = "(?:${alphanum}(?:(?:${alphanum}|-)*${alphanum})?)";
|
||||
$hostname = "(?:(?:${domainlabel}${dot})*${toplabel})";
|
||||
$host = "(?:${hostname}|${hostnumber})";
|
||||
$hostport = "(?:${host}(?::${digits})?)";
|
||||
$login = "(?:(?:${user}(?::${password})?\@)?${hostport})";
|
||||
|
||||
# The predefined schemes:
|
||||
|
||||
# FTP (see also RFC959)
|
||||
$fsegment = "(?:(?:${uchar}|[?:\@&=])*)";
|
||||
$fpath = "(?:${fsegment}(?:/${fsegment})*)";
|
||||
$ftpurl = "(?:ftp://${login}(?:/${fpath}(?:;type=[AIDaid])?)?)";
|
||||
|
||||
# FILE
|
||||
$fileurl = "(?:file://(?:${host}|localhost)?/${fpath})";
|
||||
|
||||
# HTTP
|
||||
$hsegment = "(?:(?:${uchar}|[~;:\@&=])*)";
|
||||
$search = "(?:(?:${uchar}|[;:\@&=])*)";
|
||||
$hpath = "(?:${hsegment}(?:/${hsegment})*)";
|
||||
$httpurl = "(?:https?://${hostport}(?:/${hpath}(?:${qm}${search})?)?)";
|
||||
|
||||
# GOPHER (see also RFC1436)
|
||||
$gopher_plus = "(?:${xchar}*)";
|
||||
$selector = "(?:${xchar}*)";
|
||||
$gtype = $xchar; // Omitted parens!
|
||||
$gopherurl = "(?:gopher://${hostport}(?:/${gtype}(?:${selector}" .
|
||||
"(?:%09${search}(?:%09${gopher_plus})?)?)?)?)";
|
||||
|
||||
# MAILTO (see also RFC822)
|
||||
$encoded822addr = "(?:$xchar+)";
|
||||
$mailtourl = "(?:mailto:$encoded822addr)";
|
||||
$mailtonpurl = $encoded822addr;
|
||||
|
||||
# NEWS (see also RFC1036)
|
||||
$article = "(?:(?:${uchar}|[;/?:&=])+\@${host})";
|
||||
$group = "(?:${alpha}(?:${alphanum}|[_.+-])*)";
|
||||
$grouppart = "(?:${article}|${group}|${ast})";
|
||||
$newsurl = "(?:news:${grouppart})";
|
||||
|
||||
# NNTP (see also RFC977)
|
||||
$nntpurl = "(?:nntp://${hostport}/${group}(?:/${digits})?)";
|
||||
|
||||
# TELNET
|
||||
$telneturl = "(?:telnet://${login}/?)";
|
||||
|
||||
# WAIS (see also RFC1625)
|
||||
$wpath = "(?:${uchar}*)";
|
||||
$wtype = "(?:${uchar}*)";
|
||||
$database = "(?:${uchar}*)";
|
||||
$waisdoc = "(?:wais://${hostport}/${database}/${wtype}/${wpath})";
|
||||
$waisindex = "(?:wais://${hostport}/${database}${qm}${search})";
|
||||
$waisdatabase = "(?:wais://${hostport}/${database})";
|
||||
# $waisurl = "(?:${waisdatabase}|${waisindex}|${waisdoc})";
|
||||
# Speed up: the 3 types share a common prefix.
|
||||
$waisurl = "(?:wais://${hostport}/${database}" .
|
||||
"(?:(?:/${wtype}/${wpath})|${qm}${search})?)";
|
||||
|
||||
# PROSPERO
|
||||
$fieldvalue = "(?:(?:${uchar}|[?:\@&])*)";
|
||||
$fieldname = "(?:(?:${uchar}|[?:\@&])*)";
|
||||
$fieldspec = "(?:;${fieldname}=${fieldvalue})";
|
||||
$psegment = "(?:(?:${uchar}|[?:\@&=])*)";
|
||||
$ppath = "(?:${psegment}(?:/${psegment})*)";
|
||||
$prosperourl = "(?:prospero://${hostport}/${ppath}(?:${fieldspec})*)";
|
||||
|
||||
# LDAP (see also RFC1959)
|
||||
# First. import stuff from RFC 1779 (Distinguished Names).
|
||||
# We've modified things a bit.
|
||||
$dn_separator = "(?:[;,])";
|
||||
$dn_optional_space = "(?:${nl}?${space}*)";
|
||||
$dn_spaced_separator = "(?:${dn_optional_space}${dn_separator}" .
|
||||
"${dn_optional_space})";
|
||||
$dn_oid = "(?:${digits}(?:${dot}${digits})*)";
|
||||
$dn_keychar = "(?:${xalphanum}|${space})";
|
||||
$dn_key = "(?:${dn_keychar}+|(?:OID|oid)${dot}${dn_oid})";
|
||||
$dn_string = "(?:${uchar}*)";
|
||||
$dn_attribute = "(?:(?:${dn_key}${dn_optional_space}=" .
|
||||
"${dn_optional_space})?${dn_string})";
|
||||
$dn_name_component = "(?:${dn_attribute}(?:${dn_optional_space}" .
|
||||
"${plus}${dn_optional_space}${dn_attribute})*)";
|
||||
$dn_name = "(?:${dn_name_component}" .
|
||||
"(?:${dn_spaced_separator}${dn_name_component})*" .
|
||||
"${dn_spaced_separator}?)";
|
||||
|
||||
# RFC 1558 defines the filter syntax, but that requires a PDA to recognize.
|
||||
# Since that's too powerful for Perl's REs, we allow any char between the
|
||||
# parenthesis (which have to be there.)
|
||||
$ldap_filter = "(?:\(${xchar}+\))";
|
||||
|
||||
# This is from RFC 1777. It defines an attributetype as an 'OCTET STRING',
|
||||
# whatever that is.
|
||||
$ldap_attr_type = "(?:${uchar}+)"; # I'm just guessing here.
|
||||
# The RFCs aren't clear.
|
||||
|
||||
# Now we are at the grammar of RFC 1959.
|
||||
$ldap_attr_list = "(?:${ldap_attr_type}(?:,${ldap_attr_type})*)";
|
||||
$ldap_attrs = "(?:${ldap_attr_list}?)";
|
||||
|
||||
$ldap_scope = "(?:base|one|sub)";
|
||||
$ldapurl = "(?:ldap://(?:${hostport})?/${dn_name}" .
|
||||
"(?:${qm}${ldap_attrs}" .
|
||||
"(?:${qm}${ldap_scope}(?:${qm}${ldap_filter})?)?)?)";
|
||||
|
||||
|
||||
# RFC 2056 defines the format of URLs for the Z39.50 protocol.
|
||||
$z_database = "(?:${uchar}+)";
|
||||
$z_docid = "(?:${uchar}+)";
|
||||
$z_elementset = "(?:${uchar}+)";
|
||||
$z_recordsyntax = "(?:${uchar}+)";
|
||||
$z_scheme = "(?:z39${dot}50[rs])";
|
||||
$z39_50url = "(?:${z_scheme}://${hostport}" .
|
||||
"(?:/(?:${z_database}(?:${plus}${z_database})*" .
|
||||
"(?:${qm}${z_docid})?)?" .
|
||||
"(?:;esn=${z_elementset})?" .
|
||||
"(?:;rs=${z_recordsyntax}" .
|
||||
"(?:${plus}${z_recordsyntax})*)?))";
|
||||
|
||||
|
||||
# RFC 2111 defines the format for cid/mid URLs.
|
||||
$url_addr_spec = "(?:(?:${uchar}|[;?:@&=])*)";
|
||||
$message_id = $url_addr_spec;
|
||||
$content_id = $url_addr_spec;
|
||||
$cidurl = "(?:cid:${content_id})";
|
||||
$midurl = "(?:mid:${message_id}(?:/${content_id})?)";
|
||||
|
||||
|
||||
# RFC 2122 defines the Vemmi URLs.
|
||||
$vemmi_attr = "(?:(?:${uchar}|[/?:@&])*)";
|
||||
$vemmi_value = "(?:(?:${uchar}|[/?:@&])*)";
|
||||
$vemmi_service = "(?:(?:${uchar}|[/?:@&=])*)";
|
||||
$vemmi_param = "(?:;${vemmi_attr}=${vemmi_value})";
|
||||
$vemmiurl = "(?:vemmi://${hostport}" .
|
||||
"(?:/${vemmi_service}(?:${vemmi_param}*))?)";
|
||||
|
||||
# RFC 2192 for IMAP URLs.
|
||||
# Import from RFC 2060.
|
||||
# $imap4_astring = "";
|
||||
# $imap4_search_key = "";
|
||||
# $imap4_section_text = "";
|
||||
$imap4_nz_number = $nz_digits;
|
||||
$achar = "(?:${uchar}|[&=~])";
|
||||
$bchar = "(?:${uchar}|[&=~:\@/])";
|
||||
$enc_auth_type = "(?:${achar}+)";
|
||||
$enc_list_mbox = "(?:${bchar}+)";
|
||||
$enc_mailbox = "(?:${bchar}+)";
|
||||
$enc_search = "(?:${bchar}+)";
|
||||
$enc_section = "(?:${bchar}+)";
|
||||
$enc_user = "(?:${achar}+)";
|
||||
$i_auth = "(?:;[Aa][Uu][Tt][Hh]=(?:${ast}|${enc_auth_type}))";
|
||||
$i_list_type = "(?:[Ll](?:[Ii][Ss][Tt]|[Ss][Uu][Bb]))";
|
||||
$i_mailboxlist = "(?:${enc_list_mbox}?;[Tt][Yy][Pp][Ee]=${i_list_type})";
|
||||
$i_uidvalidity = "(?:;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=" .
|
||||
"${imap4_nz_number})";
|
||||
$i_messagelist = "(?:${enc_mailbox}(?:${qm}${enc_search})?" .
|
||||
"(?:${i_uidvalidity})?)";
|
||||
$i_section = "(?:/;[Ss][Ee][Cc][Tt][Ii][Oo][Nn]=${enc_section})";
|
||||
$i_uid = "(?:/;[Uu][Ii][Dd]=${imap4_nz_number})";
|
||||
$i_messagepart = "(?:${enc_mailbox}(?:${i_uidvalidity})?${i_uid}" .
|
||||
"(?:${i_section})?)";
|
||||
$i_command = "(?:${i_mailboxlist}|${i_messagelist}|${i_messagepart})";
|
||||
$i_userauth = "(?:(?:${enc_user}(?:${i_auth})?)|" .
|
||||
"(?:${i_auth}(?:${enc_user})?))";
|
||||
$i_server = "(?:(?:${i_userauth}\@)?${hostport})";
|
||||
$imapurl = "(?:imap://${i_server}/(?:$i_command)?)";
|
||||
|
||||
# RFC 2224 for NFS.
|
||||
$nfs_mark = '[\$\-_.\!~*\'(),]';
|
||||
$nfs_unreserved = "(?:${alphanum}|${nfs_mark})";
|
||||
$nfs_unreserved = str_replace (']|[', '', $nfs_unreserved); // Make string smaller, and speed up regex.
|
||||
$nfs_pchar = "(?:${nfs_unreserved}|${escape}|[:\@&=+])";
|
||||
$nfs_segment = "(?:${nfs_pchar}*)";
|
||||
$nfs_path_segs = "(?:${nfs_segment}(?:/${nfs_segment})*)";
|
||||
$nfs_url_path = "(?:/?${nfs_path_segs})";
|
||||
$nfs_rel_path = "(?:${nfs_path_segs}?)";
|
||||
$nfs_abs_path = "(?:/${nfs_rel_path})";
|
||||
$nfs_net_path = "(?://${hostport}(?:${nfs_abs_path})?)";
|
||||
$nfs_rel_url = "(?:${nfs_net_path}|${nfs_abs_path}|${nfs_rel_path})";
|
||||
$nfsurl = "(?:nfs:${nfs_rel_url})";
|
||||
|
||||
$valid_types = array (
|
||||
'http', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'wais', 'mailto',
|
||||
'mailtonp', 'file', 'prospero', 'ldap', 'z39_50', 'cid', 'mid', 'vemmi',
|
||||
'imap', 'nfs'
|
||||
);
|
||||
|
||||
# Combining all the different URL formats into a single regex.
|
||||
|
||||
$valid = false;
|
||||
|
||||
if (!is_array ($types)) {
|
||||
$types = array ($types);
|
||||
}
|
||||
|
||||
foreach ($types as $type) {
|
||||
if (!in_array ($type, $valid_types)) {
|
||||
continue;
|
||||
}
|
||||
$re = $type.'url';
|
||||
if (preg_match ('!^'.$$re.'$!i', $url2check)) {
|
||||
$valid = $type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $valid;
|
||||
|
||||
}
|
||||
|
||||
?>
|
Loading…
Add table
Add a link
Reference in a new issue