【439】Tweets processing by Python

1.文本文件转 json 格式

  读取 txt 文件中的 tweets 文本,将其转为 json 格式,可以打印输出,也可以提取详细信息

代码:

import json
import os

folderpath = r"D:\Twitter Data\Data\test"
files = os.listdir(folderpath)
os.chdir(folderpath)

# get the first txt file
tweets_data_path = files[0]

# store json format file in this array
tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
    try:
        tweet = json.loads(line)
        tweets_data.append(tweet)
    except:
        continue
# print json format file with indentation
print(json.dumps(tweets_data[0], indent=4))

输出:

{
    "created_at": "Tue Jun 25 20:44:34 +0000 2019",
    "id": 1143621025550049280,
    "id_str": "1143621025550049280",
    "text": "Australia beat the Poms overnight \ud83d\ude01\ud83c\udfcf\ud83c\udde6\ud83c\uddfa\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f #AUSvENG #CmonAussie #CWC19",
    "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
    "truncated": false,
    "in_reply_to_status_id": null,
    "in_reply_to_status_id_str": null,
    "in_reply_to_user_id": null,
    "in_reply_to_user_id_str": null,
    "in_reply_to_screen_name": null,
    "user": {
        "id": 252426781,
        "id_str": "252426781",
        "name": "Willy Aitch",
        "screen_name": "WillyAitch",
        "location": "Melbourne, Victoria",
        "url": null,
        "description": "September 2017 to February 2018, was the greatest 5 months ever. Richmond \ud83d\udc2f\ud83d\udc2f\ud83d\udc2fwon the 2017 AFL Premiership! Philadelphia Eagles \ud83e\udd85\ud83e\udd85\ud83e\udd85 won Super Bowl LII",
        "translator_type": "none",
        "protected": false,
        "verified": false,
        "followers_count": 417,
        "friends_count": 1061,
        "listed_count": 15,
        "favourites_count": 18852,
        "statuses_count": 17796,
        "created_at": "Tue Feb 15 04:55:59 +0000 2011",
        "utc_offset": null,
        "time_zone": null,
        "geo_enabled": true,
        "lang": null,
        "contributors_enabled": false,
        "is_translator": false,
        "profile_background_color": "C0DEED",
        "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
        "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
        "profile_background_tile": false,
        "profile_link_color": "1DA1F2",
        "profile_sidebar_border_color": "C0DEED",
        "profile_sidebar_fill_color": "DDEEF6",
        "profile_text_color": "333333",
        "profile_use_background_image": true,
        "profile_image_url": "http://pbs.twimg.com/profile_images/1112669591342211072/rnbV0dCK_normal.jpg",
        "profile_image_url_https": "https://pbs.twimg.com/profile_images/1112669591342211072/rnbV0dCK_normal.jpg",
        "profile_banner_url": "https://pbs.twimg.com/profile_banners/252426781/1522377977",
        "default_profile": true,
        "default_profile_image": false,
        "following": null,
        "follow_request_sent": null,
        "notifications": null
    },
    "geo": null,
    "coordinates": null,
    "place": {
        "id": "01864a8a64df9dc4",
        "url": "https://api.twitter.com/1.1/geo/id/01864a8a64df9dc4.json",
        "place_type": "city",
        "name": "Melbourne",
        "full_name": "Melbourne, Victoria",
        "country_code": "AU",
        "country": "Australia",
        "bounding_box": {
            "type": "Polygon",
            "coordinates": [
                [
                    [
                        144.593742,
                        -38.433859
                    ],
                    [
                        144.593742,
                        -37.511274
                    ],
                    [
                        145.512529,
                        -37.511274
                    ],
                    [
                        145.512529,
                        -38.433859
                    ]
                ]
            ]
        },
        "attributes": {}
    },
    "contributors": null,
    "is_quote_status": false,
    "quote_count": 0,
    "reply_count": 0,
    "retweet_count": 0,
    "favorite_count": 0,
    "entities": {
        "hashtags": [
            {
                "text": "AUSvENG",
                "indices": [
                    46,
                    54
                ]
            },
            {
                "text": "CmonAussie",
                "indices": [
                    55,
                    66
                ]
            },
            {
                "text": "CWC19",
                "indices": [
                    67,
                    73
                ]
            }
        ],
        "urls": [],
        "user_mentions": [],
        "symbols": []
    },
    "favorited": false,
    "retweeted": false,
    "filter_level": "low",
    "lang": "en",
    "timestamp_ms": "1561495474599"
}

 

上一篇:洛谷P2751 工序安排Job Processing


下一篇:javascript – 从画布动态“卸载”处理JS草图