1
0
Fork 0

Add support for .txt files

This commit is contained in:
jvoisin 2018-07-06 00:42:09 +02:00
parent 0638b9bbbb
commit 53271495f7
5 changed files with 30 additions and 12 deletions

View File

@ -12,8 +12,6 @@ unsupported_extensions = {
'.pot',
'.rdf',
'.srt',
'.text',
'.txt',
'.wsdl',
'.xpdl',
'.xsd',

View File

@ -1,3 +1,4 @@
import shutil
from typing import Dict
from . import abstract
@ -6,13 +7,9 @@ class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that do not contain metadata. """
mimetypes = {'text/plain', }
def __init__(self, filename: str) -> None:
super().__init__(filename)
self.filename = filename
self.output_filename = filename
def get_meta(self) -> Dict[str, str]:
return dict()
def remove_all(self) -> bool:
shutil.copy(self.filename, self.output_filename)
return True

1
tests/data/dirty.txt Normal file
View File

@ -0,0 +1 @@
I'm a file that can't have metadata, but I'm supposed to be supported anyway.

View File

@ -18,11 +18,11 @@ class TestUnsupportedFiles(unittest.TestCase):
class TestExplicitelyUnsupportedFiles(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.txt')
parser, mimetype = parser_factory.get_parser('./tests/data/clean.txt')
self.assertEqual(mimetype, 'text/plain')
shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')
parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
self.assertEqual(mimetype, 'text/x-python')
self.assertEqual(parser, None)
os.remove('./tests/clean.txt')
os.remove('./tests/data/clean.py')
class TestCorruptedFiles(unittest.TestCase):

View File

@ -6,7 +6,7 @@ import os
import zipfile
import tempfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
class TestParserFactory(unittest.TestCase):
@ -104,6 +104,12 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
def test_txt(self):
p, mimetype = parser_factory.get_parser('./tests/data/dirty.txt')
self.assertEqual(mimetype, 'text/plain')
meta = p.get_meta()
self.assertEqual(meta, {})
class TestRemovingThumbnails(unittest.TestCase):
def test_odt(self):
@ -473,3 +479,19 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.odg')
os.remove('./tests/data/clean.cleaned.odg')
def test_txt(self):
shutil.copy('./tests/data/dirty.txt', './tests/data/clean.txt')
p = harmless.HarmlessParser('./tests/data/clean.txt')
meta = p.get_meta()
self.assertEqual(meta, {})
ret = p.remove_all()
self.assertTrue(ret)
p = harmless.HarmlessParser('./tests/data/clean.cleaned.txt')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.txt')
os.remove('./tests/data/clean.cleaned.txt')