url - 检查字符串是否为有效的URL, 最好的正则表达式是什么?

  显示原文与译文双语对照的内容

如何检查给定字符串是否是有效的URL地址?

我对 正规表达式的知识是基本的,不允许我从我在网站上看到的数以百计的正规表达式 进行选择。

时间:

我写了我的URL ( 真正的IRI,国际化) 模式以符合 RFC 3987 ( http://www.faqs.org/rfcs/rfc3987.html ) 。 这些是在PCRE语法中。

对于绝对虹膜( 国际化):


/^[a-z](?:[-a-z0-9+.])*:(?://(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:])*@)?(?:[(?:(?:(?:[0-9a-f]{1,4}:){6}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|::(?:[0-9a-f]{1,4}:){5}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){4}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4}:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){3}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){2}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::[0-9a-f]{1,4}:(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}|(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})?::)|v[0-9a-f]+[-a-z0-9._~!$&'()*+,;=:]+)]|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}|(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=@])*)(?::[0-9]*)?(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*|/(?:(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))+)(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*)?|(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))+)(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*|(?!(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])))(?:?(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])|[x{E000}-x{F8FF}x{F0000}-x{FFFFD}|x{100000}-x{10FFFD}/?])*)?(?:#(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])|[/?])*)?$/i

同时允许相对虹膜:


/^(?:[a-z](?:[-a-z0-9+.])*:(?://(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:])*@)?(?:[(?:(?:(?:[0-9a-f]{1,4}:){6}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|::(?:[0-9a-f]{1,4}:){5}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){4}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4}:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){3}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){2}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::[0-9a-f]{1,4}:(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}|(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})?::)|v[0-9a-f]+[-a-z0-9._~!$&'()*+,;=:]+)]|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}|(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=@])*)(?::[0-9]*)?(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*|/(?:(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))+)(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*)?|(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))+)(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*|(?!(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])))(?:?(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])|[x{E000}-x{F8FF}x{F0000}-x{FFFFD}|x{100000}-x{10FFFD}/?])*)?(?:#(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])|[/?])*)?|(?://(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:])*@)?(?:[(?:(?:(?:[0-9a-f]{1,4}:){6}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|::(?:[0-9a-f]{1,4}:){5}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){4}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4}:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){3}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){2}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::[0-9a-f]{1,4}:(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}|(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})?::)|v[0-9a-f]+[-a-z0-9._~!$&'()*+,;=:]+)]|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}|(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=@])*)(?::[0-9]*)?(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*|/(?:(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))+)(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*)?|(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=@])+)(?:/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@]))*)*|(?!(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])))(?:?(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])|[x{E000}-x{F8FF}x{F0000}-x{FFFFD}|x{100000}-x{10FFFD}/?])*)?(?:#(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9._~x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}!$&'()*+,;=:@])|[/?])*)?)$/i

它们是如何编译的( 在PHP中):


<?php

/* Regex convenience functions (character class, non-capturing group) */
function cc($str, $suffix = '', $negate = false) {
 return '['. ($negate? '^' : ''). $str. ']'. $suffix;
}
function ncg($str, $suffix = '') {
 return '(?:'. $str. ')'. $suffix;
}

/* Preserved from RFC3986 */

$ALPHA = 'a-z';
$DIGIT = '0-9';
$HEXDIG = $DIGIT. 'a-f';

$sub_delims = '!$&'()*+,;=';
$gen_delims = ':/?#[]@';
$reserved = $gen_delims. $sub_delims;
$unreserved = '-'. $ALPHA. $DIGIT. '._~';

$pct_encoded = '%'. cc($HEXDIG). cc($HEXDIG);

$dec_octet = ncg(implode('|', array(
 cc($DIGIT),
 cc('1-9'). cc($DIGIT),
 '1'. cc($DIGIT). cc($DIGIT),
 '2'. cc('0-4'). cc($DIGIT),
 '25'. cc('0-5')
)));

$IPv4address = $dec_octet. ncg('.'. $dec_octet, '{3}');

$h16 = cc($HEXDIG, '{1,4}');
$ls32 = ncg($h16. ':'. $h16. '|'. $IPv4address);

$IPv6address = ncg(implode('|', array(
 ncg($h16. ':', '{6}'). $ls32,
 '::'. ncg($h16. ':', '{5}'). $ls32,
 ncg($h16, '?'). '::'. ncg($h16. ':', '{4}'). $ls32,
 ncg($h16. ':'. $h16, '?'). '::'. ncg($h16. ':', '{3}'). $ls32,
 ncg(ncg($h16. ':', '{0,2}'). $h16, '?'). '::'. ncg($h16. ':', '{2}'). $ls32,
 ncg(ncg($h16. ':', '{0,3}'). $h16, '?'). '::'. $h16. ':'. $ls32,
 ncg(ncg($h16. ':', '{0,4}'). $h16, '?'). '::'. $ls32,
 ncg(ncg($h16. ':', '{0,5}'). $h16, '?'). '::'. $h16,
 ncg(ncg($h16. ':', '{0,6}'). $h16, '?'). '::',
)));

$IPvFuture = 'v'. cc($HEXDIG, '+'). cc($unreserved. $sub_delims. ':', '+');

$IP_literal = '['. ncg(implode('|', array($IPv6address, $IPvFuture))). ']';

$port = cc($DIGIT, '*');

$scheme = cc($ALPHA). ncg(cc('-'. $ALPHA. $DIGIT. '+.'), '*');

/* New or changed in RFC3987 */

$iprivate = 'x{E000}-x{F8FF}x{F0000}-x{FFFFD}|x{100000}-x{10FFFD}';

$ucschar = 'x{A0}-x{D7FF}x{F900}-x{FDCF}x{FDF0}-x{FFEF}'. 
 'x{10000}-x{1FFFD}x{20000}-x{2FFFD}x{30000}-x{3FFFD}'. 
 'x{40000}-x{4FFFD}x{50000}-x{5FFFD}x{60000}-x{6FFFD}'. 
 'x{70000}-x{7FFFD}x{80000}-x{8FFFD}x{90000}-x{9FFFD}'. 
 'x{A0000}-x{AFFFD}x{B0000}-x{BFFFD}x{C0000}-x{CFFFD}'. 
 'x{D0000}-x{DFFFD}x{E1000}-x{EFFFD}';

$iunreserved = '-'. $ALPHA. $DIGIT. '._~'. $ucschar;

$ipchar = ncg($pct_encoded. '|'. cc($iunreserved. $sub_delims. ':@'));

$ifragment = ncg($ipchar. '|'. cc('/?'), '*');

$iquery = ncg($ipchar. '|'. cc($iprivate. '/?'), '*');

$isegment_nz_nc = ncg($pct_encoded. '|'. cc($iunreserved. $sub_delims. '@'), '+');
$isegment_nz = ncg($ipchar, '+');
$isegment = ncg($ipchar, '*');

$ipath_empty = '(?!'. $ipchar. ')';
$ipath_rootless = ncg($isegment_nz). ncg('/'. $isegment, '*');
$ipath_noscheme = ncg($isegment_nz_nc). ncg('/'. $isegment, '*');
$ipath_absolute = '/'. ncg($ipath_rootless, '?');//Spec says isegment-nz *("/" isegment )
$ipath_abempty = ncg('/'. $isegment, '*');

$ipath = ncg(implode('|', array(
 $ipath_abempty,
 $ipath_absolute,
 $ipath_noscheme,
 $ipath_rootless,
 $ipath_empty
))). ')';

$ireg_name = ncg($pct_encoded. '|'. cc($iunreserved. $sub_delims. '@'), '*');

$ihost = ncg(implode('|', array($IP_literal, $IPv4address, $ireg_name)));
$iuserinfo = ncg($pct_encoded. '|'. cc($iunreserved. $sub_delims. ':'), '*');
$iauthority = ncg($iuserinfo. '@', '?'). $ihost. ncg(':'. $port, '?');

$irelative_part = ncg(implode('|', array(
 '//'. $iauthority. $ipath_abempty. '',
 ''. $ipath_absolute. '',
 ''. $ipath_noscheme. '',
 ''. $ipath_empty. ''
)));

$irelative_ref = $irelative_part. ncg('?'. $iquery, '?'). ncg('#'. $ifragment, '?');

$ihier_part = ncg(implode('|', array(
 '//'. $iauthority. $ipath_abempty. '',
 ''. $ipath_absolute. '',
 ''. $ipath_rootless. '',
 ''. $ipath_empty. ''
)));

$absolute_IRI = $scheme. ':'. $ihier_part. ncg('?'. $iquery, '?');

$IRI = $scheme. ':'. $ihier_part. ncg('?'. $iquery, '?'). ncg('#'. $ifragment, '?');

$IRI_reference = ncg($IRI. '|'. $irelative_ref);

编辑 7 2011年03月: 由于PHP在引用字符串中处理反斜杠的方式,默认情况下无法使用。 你将需要double-escape反斜杠,除非反斜杠在 正规表达式 中有特殊含义。 你可以这样做:


$escape_backslash = '/(?<!)(?![[]^$.|*+()QEnrtaefvdwsDWSbAZzB1-9GX]|x{[0-9a-f]{1,4}}|c[A-Z]|)/';
$absolute_IRI = preg_replace($escape_backslash, '', $absolute_IRI);
$IRI = preg_replace($escape_backslash, '', $IRI);
$IRI_reference = preg_replace($escape_backslash, '', $IRI_reference);

我刚刚写了一篇博客文章,用于识别大多数使用格式的网址,比如:

  • www.google.com
  • http://www.google.com
  • mailto:somebody@google.com
  • somebody@google.com
  • www.url-with-querystring.com/?url=has-querystring

使用的正则表达式是:


/((([A-Za-z]{3,9}:(?://)?)(?:[-;:&=+$,w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=+$,w]+@)[A-Za-z0-9.-]+)((?:/[+~%/.w-_]*)???(?:[-+=&;%@.w_]*)#?(?:[w]*))?)/

但是,我建议你转到 http://blog.mattheworiordan.com/post/13174566389/url-regular-expression-for-links-with-or-without-the 查看工作示例。

什么平台如果使用. NET,使用 System.Uri.TryCreate,而不是 正规表达式 。

例如:


static bool IsValidUrl(string urlString)
{
 Uri uri;
 return Uri.TryCreate(urlString, UriKind.Absolute, out uri)
 && (uri.Scheme == Uri.UriSchemeHttp
 || uri.Scheme == Uri.UriSchemeHttps
 || uri.Scheme == Uri.UriSchemeFtp
 || uri.Scheme == Uri.UriSchemeMailto
/*...*/);
}

//In test fixture...

[Test]
void IsValidUrl_Test()
{
 Assert.True(IsValidUrl("http://www.example.com"));
 Assert.False(IsValidUrl("javascript:alert('xss')"));
 Assert.False(IsValidUrl(""));
 Assert.False(IsValidUrl(null));
}

( 感谢 @Yoshijavascript: 技巧)

关于 eyelidness'回复的帖子"这是基于我的阅读uri的规范。": 感谢 Eyelidness,你是我寻求的完美解决方案,因为它是基于URI规范的 ! 出色的工作。: )

我必须做两个修改。 第一个获取在 PHP ( v5.2.10 ) 中正确匹配IP地址url和 preg_match() 函数的regexp 。

我不得不在管道周围的"ip地址"上添加另外一组括号:


)|((d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5]).){3}(?#

不确定为什么。

我还减少了 3到 2个字母的顶级域最小长度以支持. co. uk和类似的。

最终代码:


/^(https?|ftp)://(?# protocol
)(([a-z0-9$_.+!*'(),;?&=-]|%[0-9a-f]{2})+(?# username
)(:([a-z0-9$_.+!*'(),;?&=-]|%[0-9a-f]{2})+)?(?# password
)@)?(?# auth requires @
)((([a-z0-9].|[a-z0-9][a-z0-9-]*[a-z0-9].)*(?# domain segments AND
)[a-z][a-z0-9-]*[a-z0-9](?# top level domain OR
)|((d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5]).){3}(?#
 )(d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5])(?# IP address
))(:d+)?(?# port
))(((/+([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)*(?# path
)(?([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)(?# query string
)?)?)?(?# path and query string optional
)(#([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)?(?# fragment
)$/i

这个修改版本不是针对uri规范检查所以我不能保证它是合规,这是改变来处理这些url在本地网络环境和两位以及其他类型的weburl,并更好地工作在php安装使用。

就像 PHP代码:


define('URL_FORMAT', 
'/^(https?)://'.//protocol
'(([a-z0-9$_.+!*'(),;?&=-]|%[0-9a-f]{2})+'.//username
'(:([a-z0-9$_.+!*'(),;?&=-]|%[0-9a-f]{2})+)?'.//password
'@)?(?#'.//auth requires @
')((([a-z0-9].|[a-z0-9][a-z0-9-]*[a-z0-9].)*'.//domain segments AND
'[a-z][a-z0-9-]*[a-z0-9]'.//top level domain OR
'|((d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5]).){3}'.
'(d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5])'.//IP address
')(:d+)?'.//port
')(((/+([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)*'.//path
'(?([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)'.//query string
'?)?)?'.//path and query string optional
'(#([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)?'.//fragment
'$/i');

下面是一个使用 正规表达式 验证各种url的PHP测试程序:


<?php

define('URL_FORMAT',
'/^(https?)://'.//protocol
'(([a-z0-9$_.+!*'(),;?&=-]|%[0-9a-f]{2})+'.//username
'(:([a-z0-9$_.+!*'(),;?&=-]|%[0-9a-f]{2})+)?'.//password
'@)?(?#'.//auth requires @
')((([a-z0-9].|[a-z0-9][a-z0-9-]*[a-z0-9].)*'.//domain segments AND
'[a-z][a-z0-9-]*[a-z0-9]'.//top level domain OR
'|((d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5]).){3}'.
'(d|[1-9]d|1d{2}|2[0-4][0-9]|25[0-5])'.//IP address
')(:d+)?'.//port
')(((/+([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)*'.//path
'(?([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)'.//query string
'?)?)?'.//path and query string optional
'(#([a-z0-9$_.+!*'(),;:@&=-]|%[0-9a-f]{2})*)?'.//fragment
'$/i');

/**
 * Verify the syntax of the given URL. 
 * 
 * @access public
 * @param $url The URL to verify.
 * @return boolean
 */
function is_valid_url($url) {
 if (str_starts_with(strtolower($url), 'http://localhost')) {
 return true;
 }
 return preg_match(URL_FORMAT, $url);
}


/**
 * String starts with something
 * 
 * This function will return true only if input string starts with
 * niddle
 * 
 * @param string $string Input string
 * @param string $niddle Needle string
 * @return boolean
 */
function str_starts_with($string, $niddle) {
 return substr($string, 0, strlen($niddle)) == $niddle;
}


/**
 * Test a URL for validity and count results.
 * @param url url
 * @param expected expected result (true or false)
 */

$numtests = 0;
$passed = 0;

function test_url($url, $expected) {
 global $numtests, $passed;
 $numtests++;
 $valid = is_valid_url($url);
 echo"URL Valid?:". ($valid?"yes":"no")." for URL: $url. Expected:".($expected?"yes":"no").".";
 if($valid == $expected) {
 echo"PASSn"; $passed++;
 } else {
 echo"FAILn";
 }
}

echo"URL Tests:nn";

test_url("http://localserver/projects/public/assets/javascript/widgets/UserBoxMenu/widget.css", true);
test_url("http://www.google.com", true);
test_url("http://www.google.co.uk/projects/my%20folder/test.php", true);
test_url("https://myserver.localdomain", true);
test_url("http://192.168.1.120/projects/index.php", true);
test_url("http://192.168.1.1/projects/index.php", true);
test_url("http://projectpier-server.localdomain/projects/public/assets/javascript/widgets/UserBoxMenu/widget.css", true);
test_url("https://2.4.168.19/project-pier?c=test&a=b", true);
test_url("https://localhost/a/b/c/test.php?c=controller&arg1=20&arg2=20", true);
test_url("http://user:password@localhost/a/b/c/test.php?c=controller&arg1=20&arg2=20", true);

echo"n$passed out of $numtests tests passed.nn";

?>

再次感谢 eyelidness 正规表达式 !

下面是 RegexBuddy 使用的内容。


(b(https?|ftp|file)://)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]

它与以下( 在 ** ** 标记内) 匹配:


**http://www.regexbuddy.com** 
**http://www.regexbuddy.com/** 
**http://www.regexbuddy.com/index.html** 
**http://www.regexbuddy.com/index.html?source=library** 

你可以下载 RegexBuddy http://www.regexbuddy.com/download.html

post 获取 URL ( 正规表达式 )的一部分讨论解析一个URL来识别它的各种组件。 如果你想检查一个URL是否为 well-formed,则应该足以满足你的需求。

如果你需要检查它是否真的有效,最终你将不得不尝试访问其他终端。

一般情况下,你最好使用框架或者其他库提供给你的函数。 许多平台包括解析url的函数。 例如有 python urlparse 模块的,在. NET 中,你可以使用类的System.Uri 构造函数作为验证URL的方法。

Mathias Bynens有一篇关于大量 正规表达式的最佳比较文章: 在搜索完美URL验证 正规表达式

最好的发布有点长,但它匹配你所能把它:


_^(?:(?:https?|ftp)://)(?:S+(?::S*)?@)?(?:(?!10(?:.d{1,3}){3})(?!127(?:.d{1,3}){3})(?!169.254(?:.d{1,3}){2})(?!192.168(?:.d{1,3}){2})(?!172.(?:1[6-9]|2d|3[0-1])(?:.d{1,3}){2})(?:[1-9]d?|1dd|2[01]d|22[0-3])(?:.(?:1?d{1,2}|2[0-4]d|25[0-5])){2}(?:.(?:[1-9]d?|1dd|2[0-4]d|25[0-4]))|(?:(?:[a-zx{00a1}-x{ffff}0-9]+-?)*[a-zx{00a1}-x{ffff}0-9]+)(?:.(?:[a-zx{00a1}-x{ffff}0-9]+-?)*[a-zx{00a1}-x{ffff}0-9]+)*(?:.(?:[a-zx{00a1}-x{ffff}]{2,})))(?::d{2,5})?(?:/[^s]*)?$_iuS

作为参考,这里是 IETF规范 。 特别地, B. Parsing a URI Reference with a Regular Expression 在这里。这是他们提供的正规表达式:


 ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(?([^#]*))?(#(.*))?

就像别人所说,最好把它留给你已经使用的一个 lib/框架。

这可能不是regex的一个工作,但对于你所选择的语言的现有工具来说可能不是一个工作。 你可能希望使用已经编写,测试和调试的现有代码。

在PHP中,使用 parse_url 函数。

Perl: URI 模块

ruby: URI 模块

.NET: 'uri'类

regex不是一个魔杖你波会发生在每一个问题,涉及字符串。


 function validateURL(textval) {
 var urlregex = new RegExp(
"^(http|https|ftp)://([a-zA-Z0-9.-]+(:[a-zA-Z0-9.&amp;%$-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9-]+.)*[a-zA-Z0-9-]+.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(:[0-9]+)*(/($|[a-zA-Z0-9.,?'+&amp;%$#=~_-]+))*$");
 return urlregex.test(textval);
 }

匹配 http://site.com/dir/file.php?var=moo | ftp://user:pass@site.com:21/file/dir

Non-Matches site.com | http://site.com/dir//

...