JS RegExp

JavaScript / FastCampus

2022.02.09.

정규표현식(RegExp)

정규식, Regular Expression

생성자 방식

const regexp = new RegExp('the', 'gi')

리터럴 방식

const regexp = /the/gi

match(), test(), replace()

// 예제문자열

const str = `
010-1234-5678
thesecon@gmail.com
https://www.omdbapi.com/?apikey=7035c60c&s=frozen
The quick grown fox jumps over the lazy dog.
aabbbccccdddd
htxp
http://localhost:1234
동해물과_백두산이 마르고 닳도록
`

정규식.test(문자열) : 문자 검색

“일치 여부(Boolean)” 반환

const regexp = /fox/gi
console.log(regexp.test(str))
// true

문자열.match(정규식) : 문자 추출

일치하는 문자들을 담은 “배열(Array)” 반환

const regexp = /the/gi // global, ignoreCase
console.log(str.match(regexp))
// Array(3)
// 0: 'the'
// 1: 'The'
// 2: 'the'

문자열.replace(정규식) : 문자 대체

일치하는 문자를 대체한 결과 반환
원본문자열을 수정 x

const regexp = /fox/gi
console.log(str.replace(regexp, 'AAA'))
// 예제문자열의 fox가 AAA로 대체된 결과를 보여줌
// 원본은 수정 안됨!

원본문자열 수정 원한다면, “재할당” 해야됨!

const regexp = /fox/gi
str = str.replace(regexp, 'AAA')
// 이런식으로 재할당하면 가능!

플래그(옵션) & 패턴(표현)

g: 모든 문자 일치(global)
i: 영어 대소문자를 구분x (ignore case)
m: 여러 줄 일치(multi line)

console.log(str.match(/\.$/im))
// 만약 g 플래그 없으면
// ['.', index: 127, input: '\n010-1234-5678\nthesecon@gmail.com\nhttps://www.omdb…ddd\nhtxp\nhttp://localhost:1234\n동해물과_백두산이 마르고 닳도록\n', groups: undefined]

console.log(str.match(/\.$/gim))
// g 있으면
// ['.']
// $: 문자열의 '끝부분'을 탐색

\. : 특정 문자 ’.’를 찾음
. : 그냥 온점은 줄 구분 문자(엔터키)제외하고 모든 문자를 의미

특수기호쓰려면 '\'를 앞에 붙여야함
원래 문자열은 시작과 끝, 하나씩 존재하는데,
문자열이 엔터로 구분된 상태에서 m 플래그가 있으면,
그 줄 각각 시작과 끝부분이 존재하게 됨

만약, 문자열 끝에 . 점 있고 엔터까지 한 상태
m 플래그 없으면 문자열의 끝은 엔터이므로, . 점을 못찾음
m 있으면 엔터친게 문자열의 끝을 의미하므로 . 점 찾는거 성공함

’..’

console.log(
  str.match(/h..p/g), // h와 p사이에 아무 문자가 와도 찾아짐
)
// ['http', 'htxp', 'http']

|

console.log(
  str.match(/dog|fox/), // 둘 중 먼저 찾아지는 하나만 출력됨
)
// ['fox', index: 100, input: '\n010-1234-5678\nthesecon@gmail.com\nhttps://www.omdb…ddd\nhtxp\nhttp://localhost:1234\n동해물과_백두산이 마르고 닳도록\n', groups: undefined]

\w

대소영문 + 숫자 + _ 까지

console.log(str.match(/\w{2,3}/g))
// ['010', '123', '567', 'the', 'sec', 'on', 'gma', 'il', 'com', 'htt', 'ps', 'www', 'omd', 'bap', 'com', 'api', 'key', '703', '5c6', '0c', 'fro', 'zen', 'The', 'qui', 'ck', 'gro', 'wn', 'fox', 'jum', 'ps', 'ove', 'the', 'laz', 'dog', 'aab', 'bbc', 'ccc', 'ddd', 'htx', 'htt', 'loc', 'alh', 'ost', '123']

\b

특수문자를 기준으로 구분하여 찾기

console.log(str.match(/\b\w{2,3}\b/g))
// \b: 앞에만 쓰면 3글자 이상인 문자들에서도 앞쪽기준으로 찾아짐
// 뒤에만 쓰면 3글자 이상인 문자들에서도 뒤쪽기준으로 찾아짐
// 앞, 뒤 둘다 쓰면 무조건 2글자 이상, 3글자 이하인 범위에서 '특수문자'로 구분되어진 것만 찾아짐
// ['010', 'com', 'www', 'com', 'The', 'fox', 'the', 'dog']

[가-힣], {1, }

console.log(str.match(/[가-힣]{1,}/g))
// 가 부터 힣 사이의 문자 구간(한글)
// ['동해물과', '백두산이', '마르고', '닳도록']

{3}: 3개 연속일치
{3, }: 3개 이상 연속일치
{3, 5}: 3개 이상 5개 이하 연속 일치

f\w{1, }

console.log(
  str.match(/f\w{1,}/g), // 'f'로 시작되는 단어들 모두 찾아짐
)
// ['frozen', 'fox']

\d

console.log(
  str.match(/\d{1,}/g), // 숫자로 시작되는 단어들
)
// ['010', '1234', '5678', '7035', '60', '1234']

(?=), (?<=)

(?=): 앞쪽 일치(Lookahead)
(?<=): 뒤쪽 일치(Lookbehind)

console.log(
  str.match(/.{1,}(?=@)/g), // @ 앞쪽 일치
)
// ['thesecon']
console.log(
  str.match(/(?<=@).{1,}/g), // @ 뒤쪽 일치
)
// ['gmail.com']

\s

공백(Space, Tab) 등에 일치
replace()와 함께 사용하면?

const h = ` the hello  world  !

`

console.log(
  h.replace(/\s/g, ''), // 모든 공백들을 삭제
)
// thehelloworld!

정보 링크

정규표현식, 이렇게 시작하자!

Heropy 블로그 링크

정규표현식 README.md 정리본

Github-RegExp

정규표현식 실전 테스트

테스트공간 링크

정규표현식(RegExp)
생성자 방식
리터럴 방식
match(), test(), replace()
플래그(옵션) & 패턴(표현)
- ’..’
- |
- \w
- \b
- [가-힣], {1, }
- f\w{1, }
- \d
- (?=), (?<=)
- \s
정보 링크